From b1e4656e8ee3289dc5f3139fc8eb33152f96bfe6 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 24 Sep 2024 17:48:29 +0300 Subject: [PATCH 001/212] [NFC][analyzer] Make `invalidateRegions` accept `Stmt` instead of `Expr` (#109792) As was reported [here](https://github.com/llvm/llvm-project/pull/103714#pullrequestreview-2238037812), `invalidateRegions` should accept `Stmt` instead of `Expr`. This conversion is possible, since `Expr` was anyway converted back to `Stmt` later. This refactoring is needed to fix another FP related to use of inline assembly. The fix would be to change `State->bindLoc` to `state->invalidateRegions` inside inline assembly visitor, since `bindLoc` only binds to offset 0, which is not really correct semantics in case of inline assembly. --- .../Core/PathSensitive/ProgramState.h | 4 +- .../Core/PathSensitive/SValBuilder.h | 6 +- .../StaticAnalyzer/Core/PathSensitive/Store.h | 4 +- .../lib/StaticAnalyzer/Core/ProgramState.cpp | 30 +++---- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 90 ++++++++----------- clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 4 +- 6 files changed, 57 insertions(+), 81 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h index 2f6cd481fd636..eef7a54f03bf1 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -326,14 +326,14 @@ class ProgramState : public llvm::FoldingSetNode { /// \param ITraits information about special handling for particular regions /// or symbols. [[nodiscard]] ProgramStateRef - invalidateRegions(ArrayRef Regions, const Expr *E, + invalidateRegions(ArrayRef Regions, const Stmt *S, unsigned BlockCount, const LocationContext *LCtx, bool CausesPointerEscape, InvalidatedSymbols *IS = nullptr, const CallEvent *Call = nullptr, RegionAndSymbolInvalidationTraits *ITraits = nullptr) const; [[nodiscard]] ProgramStateRef - invalidateRegions(ArrayRef Values, const Expr *E, unsigned BlockCount, + invalidateRegions(ArrayRef Values, const Stmt *S, unsigned BlockCount, const LocationContext *LCtx, bool CausesPointerEscape, InvalidatedSymbols *IS = nullptr, const CallEvent *Call = nullptr, diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h index 6eedaf0544559..ec2b2b2456948 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h @@ -202,11 +202,9 @@ class SValBuilder { const Expr *expr, const LocationContext *LCtx, unsigned count); - DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, - const Expr *expr, + DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, const Stmt *S, const LocationContext *LCtx, - QualType type, - unsigned count); + QualType type, unsigned count); DefinedOrUnknownSVal conjureSymbolVal(const Stmt *stmt, const LocationContext *LCtx, QualType type, diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h index e08d5e104e9c0..332855a3c9c45 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h @@ -215,7 +215,7 @@ class StoreManager { /// /// \param[in] store The initial store. /// \param[in] Values The values to invalidate. - /// \param[in] E The current statement being evaluated. Used to conjure + /// \param[in] S The current statement being evaluated. Used to conjure /// symbols to mark the values of invalidated regions. /// \param[in] Count The current block count. Used to conjure /// symbols to mark the values of invalidated regions. @@ -233,7 +233,7 @@ class StoreManager { /// even if they do not currently have bindings. Pass \c NULL if this /// information will not be used. virtual StoreRef invalidateRegions( - Store store, ArrayRef Values, const Expr *Ex, unsigned Count, + Store store, ArrayRef Values, const Stmt *S, unsigned Count, const LocationContext *LCtx, const CallEvent *Call, InvalidatedSymbols &IS, RegionAndSymbolInvalidationTraits &ITraits, InvalidatedRegions *TopLevelRegions, InvalidatedRegions *Invalidated) = 0; diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index e6d3399a21942..0be2709f0907d 100644 --- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -147,30 +147,24 @@ ProgramState::bindDefaultZero(SVal loc, const LocationContext *LCtx) const { typedef ArrayRef RegionList; typedef ArrayRef ValueList; -ProgramStateRef -ProgramState::invalidateRegions(RegionList Regions, - const Expr *E, unsigned Count, - const LocationContext *LCtx, - bool CausedByPointerEscape, - InvalidatedSymbols *IS, - const CallEvent *Call, - RegionAndSymbolInvalidationTraits *ITraits) const { +ProgramStateRef ProgramState::invalidateRegions( + RegionList Regions, const Stmt *S, unsigned Count, + const LocationContext *LCtx, bool CausedByPointerEscape, + InvalidatedSymbols *IS, const CallEvent *Call, + RegionAndSymbolInvalidationTraits *ITraits) const { SmallVector Values; for (const MemRegion *Reg : Regions) Values.push_back(loc::MemRegionVal(Reg)); - return invalidateRegions(Values, E, Count, LCtx, CausedByPointerEscape, IS, + return invalidateRegions(Values, S, Count, LCtx, CausedByPointerEscape, IS, Call, ITraits); } -ProgramStateRef -ProgramState::invalidateRegions(ValueList Values, - const Expr *E, unsigned Count, - const LocationContext *LCtx, - bool CausedByPointerEscape, - InvalidatedSymbols *IS, - const CallEvent *Call, - RegionAndSymbolInvalidationTraits *ITraits) const { +ProgramStateRef ProgramState::invalidateRegions( + ValueList Values, const Stmt *S, unsigned Count, + const LocationContext *LCtx, bool CausedByPointerEscape, + InvalidatedSymbols *IS, const CallEvent *Call, + RegionAndSymbolInvalidationTraits *ITraits) const { ProgramStateManager &Mgr = getStateManager(); ExprEngine &Eng = Mgr.getOwningEngine(); @@ -186,7 +180,7 @@ ProgramState::invalidateRegions(ValueList Values, StoreManager::InvalidatedRegions TopLevelInvalidated; StoreManager::InvalidatedRegions Invalidated; const StoreRef &NewStore = Mgr.StoreMgr->invalidateRegions( - getStore(), Values, E, Count, LCtx, Call, *IS, *ITraits, + getStore(), Values, S, Count, LCtx, Call, *IS, *ITraits, &TopLevelInvalidated, &Invalidated); ProgramStateRef NewState = makeWithStore(NewStore); diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index c257a87dff385..674099dd7e1f0 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -405,19 +405,15 @@ class RegionStoreManager : public StoreManager { //===-------------------------------------------------------------------===// // Binding values to regions. //===-------------------------------------------------------------------===// - RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K, - const Expr *Ex, + RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K, const Stmt *S, unsigned Count, const LocationContext *LCtx, RegionBindingsRef B, InvalidatedRegions *Invalidated); - StoreRef invalidateRegions(Store store, - ArrayRef Values, - const Expr *E, unsigned Count, - const LocationContext *LCtx, - const CallEvent *Call, - InvalidatedSymbols &IS, + StoreRef invalidateRegions(Store store, ArrayRef Values, const Stmt *S, + unsigned Count, const LocationContext *LCtx, + const CallEvent *Call, InvalidatedSymbols &IS, RegionAndSymbolInvalidationTraits &ITraits, InvalidatedRegions *Invalidated, InvalidatedRegions *InvalidatedTopLevel) override; @@ -975,7 +971,7 @@ RegionStoreManager::removeSubRegionBindings(RegionBindingsConstRef B, namespace { class InvalidateRegionsWorker : public ClusterAnalysis { - const Expr *Ex; + const Stmt *S; unsigned Count; const LocationContext *LCtx; InvalidatedSymbols &IS; @@ -983,18 +979,15 @@ class InvalidateRegionsWorker : public ClusterAnalysis StoreManager::InvalidatedRegions *Regions; GlobalsFilterKind GlobalsFilter; public: - InvalidateRegionsWorker(RegionStoreManager &rm, - ProgramStateManager &stateMgr, - RegionBindingsRef b, - const Expr *ex, unsigned count, - const LocationContext *lctx, - InvalidatedSymbols &is, + InvalidateRegionsWorker(RegionStoreManager &rm, ProgramStateManager &stateMgr, + RegionBindingsRef b, const Stmt *S, unsigned count, + const LocationContext *lctx, InvalidatedSymbols &is, RegionAndSymbolInvalidationTraits &ITraitsIn, StoreManager::InvalidatedRegions *r, GlobalsFilterKind GFK) - : ClusterAnalysis(rm, stateMgr, b), - Ex(ex), Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r), - GlobalsFilter(GFK) {} + : ClusterAnalysis(rm, stateMgr, b), S(S), + Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r), + GlobalsFilter(GFK) {} void VisitCluster(const MemRegion *baseR, const ClusterBindings *C); void VisitBinding(SVal V); @@ -1127,7 +1120,7 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, // Invalidate the region by setting its default value to // conjured symbol. The type of the symbol is irrelevant. DefinedOrUnknownSVal V = - svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, Ctx.IntTy, Count); + svalBuilder.conjureSymbolVal(baseR, S, LCtx, Ctx.IntTy, Count); B = B.addBinding(baseR, BindingKey::Default, V); return; } @@ -1148,8 +1141,8 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, if (T->isRecordType()) { // Invalidate the region by setting its default value to // conjured symbol. The type of the symbol is irrelevant. - DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, - Ctx.IntTy, Count); + DefinedOrUnknownSVal V = + svalBuilder.conjureSymbolVal(baseR, S, LCtx, Ctx.IntTy, Count); B = B.addBinding(baseR, BindingKey::Default, V); return; } @@ -1216,15 +1209,14 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, } conjure_default: // Set the default value of the array to conjured symbol. - DefinedOrUnknownSVal V = - svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, - AT->getElementType(), Count); - B = B.addBinding(baseR, BindingKey::Default, V); - return; + DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal( + baseR, S, LCtx, AT->getElementType(), Count); + B = B.addBinding(baseR, BindingKey::Default, V); + return; } - DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, - T,Count); + DefinedOrUnknownSVal V = + svalBuilder.conjureSymbolVal(baseR, S, LCtx, T, Count); assert(SymbolManager::canSymbolicate(T) || V.isUnknown()); B = B.addBinding(baseR, BindingKey::Direct, V); } @@ -1252,19 +1244,16 @@ bool InvalidateRegionsWorker::includeEntireMemorySpace(const MemRegion *Base) { RegionAndSymbolInvalidationTraits::TK_EntireMemSpace); } -RegionBindingsRef -RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K, - const Expr *Ex, - unsigned Count, - const LocationContext *LCtx, - RegionBindingsRef B, - InvalidatedRegions *Invalidated) { +RegionBindingsRef RegionStoreManager::invalidateGlobalRegion( + MemRegion::Kind K, const Stmt *S, unsigned Count, + const LocationContext *LCtx, RegionBindingsRef B, + InvalidatedRegions *Invalidated) { // Bind the globals memory space to a new symbol that we will use to derive // the bindings for all globals. const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K); - SVal V = svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void*) GS, Ex, LCtx, - /* type does not matter */ Ctx.IntTy, - Count); + SVal V = + svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void *)GS, S, LCtx, + /* type does not matter */ Ctx.IntTy, Count); B = B.removeBinding(GS) .addBinding(BindingKey::Make(GS, BindingKey::Default), V); @@ -1298,16 +1287,11 @@ void RegionStoreManager::populateWorkList(InvalidateRegionsWorker &W, } } -StoreRef -RegionStoreManager::invalidateRegions(Store store, - ArrayRef Values, - const Expr *Ex, unsigned Count, - const LocationContext *LCtx, - const CallEvent *Call, - InvalidatedSymbols &IS, - RegionAndSymbolInvalidationTraits &ITraits, - InvalidatedRegions *TopLevelRegions, - InvalidatedRegions *Invalidated) { +StoreRef RegionStoreManager::invalidateRegions( + Store store, ArrayRef Values, const Stmt *S, unsigned Count, + const LocationContext *LCtx, const CallEvent *Call, InvalidatedSymbols &IS, + RegionAndSymbolInvalidationTraits &ITraits, + InvalidatedRegions *TopLevelRegions, InvalidatedRegions *Invalidated) { GlobalsFilterKind GlobalsFilter; if (Call) { if (Call->isInSystemHeader()) @@ -1319,7 +1303,7 @@ RegionStoreManager::invalidateRegions(Store store, } RegionBindingsRef B = getRegionBindings(store); - InvalidateRegionsWorker W(*this, StateMgr, B, Ex, Count, LCtx, IS, ITraits, + InvalidateRegionsWorker W(*this, StateMgr, B, S, Count, LCtx, IS, ITraits, Invalidated, GlobalsFilter); // Scan the bindings and generate the clusters. @@ -1339,12 +1323,12 @@ RegionStoreManager::invalidateRegions(Store store, // TODO: This could possibly be more precise with modules. switch (GlobalsFilter) { case GFK_All: - B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind, - Ex, Count, LCtx, B, Invalidated); + B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind, S, + Count, LCtx, B, Invalidated); [[fallthrough]]; case GFK_SystemOnly: - B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind, - Ex, Count, LCtx, B, Invalidated); + B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind, S, Count, + LCtx, B, Invalidated); [[fallthrough]]; case GFK_None: break; diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index 7eca0579143f4..cb5fcbade2cfc 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -174,7 +174,7 @@ DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const void *SymbolTag, } DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const void *symbolTag, - const Expr *expr, + const Stmt *St, const LocationContext *LCtx, QualType type, unsigned count) { @@ -184,7 +184,7 @@ DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const void *symbolTag, if (!SymbolManager::canSymbolicate(type)) return UnknownVal(); - SymbolRef sym = SymMgr.conjureSymbol(expr, LCtx, type, count, symbolTag); + SymbolRef sym = SymMgr.conjureSymbol(St, LCtx, type, count, symbolTag); if (Loc::isLocType(type)) return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym)); From 3fbf6f8bb183ad8b9157e50c442479f4ca7a9b8d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 24 Sep 2024 15:50:12 +0100 Subject: [PATCH 002/212] [LV] Remove more references of unrolled parts after 57f5d8f2fe. Continue to clean up some now stale references of unroll parts and related terminology as pointed out post-commit for 06c3a7d. --- .../Transforms/Vectorize/LoopVectorize.cpp | 6 ----- llvm/lib/Transforms/Vectorize/VPlan.cpp | 26 +++++++++---------- llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++--- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 +-- 4 files changed, 16 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0566d80c1cc00..d296511b97799 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -538,12 +538,6 @@ class InnerLoopVectorizer { /// A small list of PHINodes. using PhiVector = SmallVector; - /// A type for scalarized values in the new loop. Each value from the - /// original loop, when scalarized, is represented by UF x VF scalar values - /// in the new unrolled loop, where UF is the unroll factor and VF is the - /// vectorization factor. - using ScalarParts = SmallVector, 2>; - /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index ce15b2783cc45..5e4d487261c6f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -333,10 +333,10 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { // However, if we are vectorizing, we need to construct the vector values. // If the value is known to be uniform after vectorization, we can just - // broadcast the scalar value corresponding to lane zero for each unroll - // iteration. Otherwise, we construct the vector values using - // insertelement instructions. Since the resulting vectors are stored in - // State, we will only generate the insertelements once. + // broadcast the scalar value corresponding to lane zero. Otherwise, we + // construct the vector values using insertelement instructions. Since the + // resulting vectors are stored in State, we will only generate the + // insertelements once. Value *VectorValue = nullptr; if (IsUniform) { VectorValue = GetBroadcastInstrs(ScalarValue); @@ -769,15 +769,15 @@ void VPRegionBlock::execute(VPTransformState *State) { // Enter replicating mode. State->Instance = VPIteration(0, 0); - assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); - for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; - ++Lane) { - State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); - // Visit the VPBlocks connected to \p this, starting from it. - for (VPBlockBase *Block : RPOT) { - LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); - Block->execute(State); - } + assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); + for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; + ++Lane) { + State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); + // Visit the VPBlocks connected to \p this, starting from it. + for (VPBlockBase *Block : RPOT) { + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); + Block->execute(State); + } } // Exit replicating mode. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0632495bc511c..c886a39aec76e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -254,7 +254,7 @@ struct VPTransformState { DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan); - /// The chosen Vectorization and Unroll Factors of the loop being vectorized. + /// The chosen Vectorization Factor of the loop being vectorized. ElementCount VF; /// Hold the indices to generate specific scalar instructions. Null indicates @@ -1253,9 +1253,7 @@ class VPInstruction : public VPRecipeWithIRFlags, ComputeReductionResult, // Takes the VPValue to extract from as first operand and the lane or part // to extract as second operand, counting from the end starting with 1 for - // last. The second operand must be a positive constant and <= VF when - // extracting from a vector or <= UF when extracting from an unrolled - // scalar. + // last. The second operand must be a positive constant and <= VF. ExtractFromEnd, LogicalAnd, // Non-poison propagating logical And. // Add an offset in bytes (second operand) to a base pointer (first diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 318d6a8c5b8c3..f33293e65010f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2490,9 +2490,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { // If the group is reverse, adjust the index to refer to the last vector lane // instead of the first. We adjust the index from the first vector lane, // rather than directly getting the pointer for lane VF - 1, because the - // pointer operand of the interleaved access is supposed to be uniform. For - // uniform instructions, we're only required to generate a value for the - // first vector lane in each unroll iteration. + // pointer operand of the interleaved access is supposed to be uniform. if (Group->isReverse()) { Value *RuntimeVF = getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF); From 8b5e841487976ecc4233227fdd069f5a5f4443f0 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Tue, 24 Sep 2024 10:14:13 -0500 Subject: [PATCH 003/212] [MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine Gather/Scatter definition (#109675) Bring back #109144 with fixes to VectorToXeGPU --- .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 64 +++++++--- .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 86 ++++++++++---- .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 73 +++++++----- .../VectorToXeGPU/VectorToXeGPU.cpp | 13 +-- mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 46 +++++--- mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 109 +++++++++++++----- mlir/test/Dialect/XeGPU/XeGPUOps.mlir | 74 +++++++----- mlir/test/Dialect/XeGPU/invalid.mlir | 75 +++++++----- 8 files changed, 365 insertions(+), 175 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index f3ca09a6a68ea..26eec0d4f2082 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -19,12 +19,18 @@ class XeGPUAttr traits = [], let mnemonic = attrMnemonic; } -def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> { +class XeGPU_TensorDescAttr traits = [], + string baseCppClass = "::mlir::Attribute"> + : XeGPUAttr { + let assemblyFormat = "`<` struct(params) `>`"; +} + +def XeGPU_BlockTensorDescAttr: XeGPU_TensorDescAttr<"BlockTensorDesc", "block_tdesc_attr"> { let summary = [{a composite attribute for `TensorDescType`}]; - let description = [{`TensorDescAttr` (or `tdesc_attr`) is a composite + let description = [{`BlockTensorDesc` (or `block_tdesc_attr`) is a composite attribute defined for `TensorDescType` for describing following properties of a `TensorDesc`. - 1. `memory_scope`: It describes where the data block described by the + 1. `memory_space`: It describes where the data block described by the TensorDesc is located, `Global` device memory or `Shared` local memory. It is default to `Global`. 2. `array_length`: It describes how many horizontally consecutive blocks @@ -33,43 +39,63 @@ def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> { 8x32. Its default value is 1. 3. `boundary_check`: It is used to indicates the hardware whether to do out-of-boundary check. The default value is true. - 4. `scattered`: It is used to differenciate TensorDescs created from - `create_nd_tdesc` vs from `create_tdesc`. }]; let parameters = (ins - OptionalParameter<"MemoryScopeAttr">: $memory_scope, + OptionalParameter<"MemorySpaceAttr">: $memory_space, OptionalParameter<"IntegerAttr", "1">: $array_length, - OptionalParameter<"BoolAttr", "true">: $boundary_check, - OptionalParameter<"BoolAttr", "false">: $scattered + OptionalParameter<"BoolAttr", "true">: $boundary_check ); let builders = [ AttrBuilder<(ins - CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope, + CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space, CArg<"int", "1">:$array_length, - CArg<"bool", "true">: $boundary_check, - CArg<"bool", "false">: $scattered + CArg<"bool", "true">: $boundary_check )> ]; - let assemblyFormat = "`<` struct(params) `>`"; } +def XeGPU_ScatterTensorDescAttr: XeGPU_TensorDescAttr<"ScatterTensorDesc", "scatter_tdesc_attr"> { + let summary = [{a composite attribute for `TensorDescType`}]; + let description = [{`ScatterTensorDesc` (or `scatter_tdesc_attr`) is a composite + attribute defined for `TensorDescType` for describing following + properties of a `TensorDesc`. + 1. `memory_space`: It describes where the data block described by the + TensorDesc is located, `Global` device memory or `Shared` local memory. + It is default to `Global`. + 2. `chunk_size`: indicates number of continious elements accessed for each + offset, default is 1. It is used with `scattered` attr only. + }]; + + let parameters = (ins + OptionalParameter<"MemorySpaceAttr">: $memory_space, + OptionalParameter<"IntegerAttr", "1">: $chunk_size + ); + + let builders = [ + AttrBuilder<(ins + CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space, + CArg<"int", "1">: $chunk_size + )> + ]; + } + //===----------------------------------------------------------------------===// // XeGPU Memory Scope Enums. //===----------------------------------------------------------------------===// -def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">; -def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">; -def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", +def XeGPU_MemorySpaceGlobal: I32EnumAttrCase<"Global", 0, "global">; +def XeGPU_MemorySpaceShared: I32EnumAttrCase<"SLM", 3, "slm">; +def XeGPU_MemorySpace: I32EnumAttr<"MemorySpace", "The address space of the memory the tensor descritor is created for", - [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> { + [XeGPU_MemorySpaceGlobal, XeGPU_MemorySpaceShared]> { let genSpecializedAttr = 0; let cppNamespace = "::mlir::xegpu"; } -def XeGPU_MemoryScopeAttr: - EnumAttr { +def XeGPU_MemorySpaceAttr: + EnumAttr { let summary = [{Describe the location of data described by a `TensorDesc`: Global device memory (`Global`) or Shared local memory (`SLM`).}]; let assemblyFormat = "$value"; @@ -116,4 +142,4 @@ def XeGPU_FenceScopeAttr: let assemblyFormat = "$value"; } -#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD \ No newline at end of file +#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td index c32c7541c3979..e24a056de2caf 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td @@ -218,6 +218,23 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; } mlir::Value getViewSource() { return getSource(); } + + unsigned getSourceMemorySpace() { + auto srcTy = getSourceType(); + if (auto memrefTy = llvm::dyn_cast(srcTy)) { + auto attr = memrefTy.getMemorySpace(); + if (attr) { + if (auto intAttr = llvm::dyn_cast(attr)) { + return static_cast(intAttr.getInt()); + } + if (auto memSpaceAttr = llvm::dyn_cast(attr)) + return static_cast(memSpaceAttr.getValue()); + } + } + // take global as default memory scope. + return static_cast(MemorySpace::Global); + } + }]; } @@ -411,8 +428,10 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> { is fixed to the hardware supportted subgroup size, e.g., 16 on PVC, implying each element in the array corresponds to a work-item (SIMT lane) in the subgroup. - * chunk_size: [optional attribute] indicates number of continious - elements accessed for each offset, default is 1. + + The first dimension of the result TensorDesc corresponds to work-items, so it should + match the dimension of offsets. It may also has a second dimension corresponding to + the chunk_size if the chunk size is larger than 1. Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64] ```mlir @@ -424,29 +443,22 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> { It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71] ```mlir %0 = memref.alloc() : memref<1024xf32> - %1 = xegpu.create_tdesc %0[0, 16, 32, 64] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> + %1 = xegpu.create_tdesc %0[0, 16, 32, 64] : memref<1024xf32> -> TensorDesc<4x8xf32, chunk_size = 8> ``` Example 3. It is similar to Example 2, but there is some overlaps among workitems. It accesses: a[0:7], a[4:11], a[8:15], a[12:19] ```mlir %0 = memref.alloc() : memref<1024xf32> - %1 = xegpu.create_tdesc %0[0, 4, 8, 12] {chunk_size = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> + %1 = xegpu.create_tdesc %0[0, 4, 8, 12] : memref<1024xf32> -> TensorDesc<4x8xf32, chunk_size = 8>> ``` }]; let arguments = (ins XeGPU_BaseAddrType: $source, Variadic: $offsets, - DenseI64ArrayAttr: $const_offsets, - DefaultValuedAttr: $chunk_size); + DenseI64ArrayAttr: $const_offsets); let results = (outs XeGPU_TensorDesc:$TensorDesc); - let builders = [ - OpBuilder<(ins "xegpu::TensorDescType": $TensorDesc, "Value": $source, - "llvm::ArrayRef": $offsets, - CArg<"uint32_t", "1"> : $chunk_size)>, - ]; - let assemblyFormat = [{ $source custom($offsets, $const_offsets) @@ -473,6 +485,22 @@ def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> { assert(idx < getNumOffsets() && "Invalid out of bound access."); return getMixedOffsets()[idx]; } + + unsigned getSourceMemorySpace() { + auto srcTy = getSource().getType(); + if (auto memrefTy = llvm::dyn_cast(srcTy)) { + auto attr = memrefTy.getMemorySpace(); + if (attr) { + if (auto intAttr = llvm::dyn_cast(attr)) + return static_cast(intAttr.getInt()); + if (auto memSpaceAttr = llvm::dyn_cast(attr)) + return static_cast(memSpaceAttr.getValue()); + } + } + // take global as default memory scope. + return static_cast(MemorySpace::Global); + } + }]; let hasVerifier = 1; @@ -520,28 +548,31 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"] let description = [{ It (aka. load) load data per each work-item. The output describes the data being loaded at the subgroup level, so its size is - consistent with the number of work-items in a subgroup. When `chunk_size_per_lane` - attribute is larger than 1 in TensorDesc, the output vector will be 2D vector, - with dim-1 correspoding to the chunk size. + consistent with the number of work-items in a subgroup. When the chunk size + is larger than 2, the output vector is a 2D vector, with dim-1 correspoding + to work-items, and dim-0 corresponding to the chunk_size loaded by each work-item. + Specially, there is a transpose effect on the result (as compared to the TensorDesc) + due to the hardware implementation. Therefore, a transpose attribute is introduced + on purpose, making sure users are aware of this implicit transformation. The mask operand masks out memory access so that it is safe to pass out-of-boundary addresses/offsets as long as they are masked. It applies to slots of SIMD lanes. Example: ```mlir - %2 = xegpu.load %1, %0 {transpose = [1, 0], + %2 = xegpu.load %1, %0 {transpose, l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, l3_hint = #xegpu.cache_hint} - : !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr>, vector<16xi1> - -> vector<16xf32> + : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr>, + vector<16xi1> -> vector<16xf32> ``` }]; let arguments = (ins XeGPU_TensorDesc: $TensorDesc, XeGPU_MaskType: $mask, - OptionalAttr: $transpose, + OptionalAttr: $transpose, OptionalAttr: $l1_hint, OptionalAttr: $l2_hint, OptionalAttr: $l3_hint); @@ -573,11 +604,15 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllRanksMatch<["value", "TensorDesc"] let hasVerifier = 1; } -def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllShapesMatch<["value", "TensorDesc"]>, - AllElementTypesMatch<["value", "TensorDesc"]>]> { +def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllElementCountsMatch<["value", "TensorDesc"]>, + AllElementTypesMatch<["value", "TensorDesc"]>]> { let summary = "store data to scattered memory locations."; - let description = [{ It (aka. store) stores data to scattered memory locations. - It has similar semantic to `load_gather`. + let description = [{ It (aka. store) stores data to scattered memory locations. The value is + typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be + a 2D vector instead. For the later case, dim-1 of the value correspods to the simd lanes + and the dim-0 of the value corresponds to the chunk_size stored per lane. So `store_scatter` + has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is + introduced on purpose, making sure users are aware of this implicit transformation. Example: ```mlir @@ -592,6 +627,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllShapesMatch<["value", "TensorDe XeGPU_ValueType: $value, XeGPU_TensorDesc: $TensorDesc, XeGPU_MaskType: $mask, + OptionalAttr: $transpose, OptionalAttr: $l1_hint, OptionalAttr: $l2_hint, OptionalAttr: $l3_hint); @@ -723,7 +759,7 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>] def XeGPU_AtomicRMWOp: XeGPU_Op<"atomic_rmw", [Pure, AllElementTypesMatch<["tensorDesc", "value", "result"]>, - AllShapesMatch<["tensorDesc", "mask", "value", "result"]>]> { + AllShapesMatch<["tensorDesc", "value", "result"]>]> { let summary = "Atomic ready-modify-write operation on the TensorDesc. "; let description = [{ @@ -808,7 +844,7 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> { 2. `Fence_scope` describes the scope of fence. "Workgroup" means that the scope would be within each workgroup. "GPU" means the scope would be across workgroups within the GPU. }]; - let arguments = (ins XeGPU_MemoryScopeAttr: $memory_kind, + let arguments = (ins XeGPU_MemorySpaceAttr: $memory_kind, XeGPU_FenceScopeAttr: $fence_scope); let assemblyFormat = [{`memory_kind` `=` `` $memory_kind `,` `fence_scope` `=` `` $fence_scope attr-dict}]; let extraClassDeclaration = extraBaseClassDeclaration; diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 9f101a71697b5..0ce1211664b5b 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -48,7 +48,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", Similar to the builtin tensor, it also provides an optinal attribute to encoding the following information via the TensorDescAttr object: - * memory_scope (xegpu::MemoryScope): [optional] where the data is located, + * memory_space (xegpu::MemorySpace): [optional] where the data is located, global memory or shared memory. It is default to Global. * array_length (int): [optional] The number of contiguous blocks with size as `shape`, that will be loaded by block load at a time. It is default to 1. @@ -63,7 +63,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", element-type ::= float-type | integer-type | index-type dim-list := (static-dim-list `x`)? static-dim-list ::= decimal-literal `x` decimal-literal - attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)? + attr-list = (, memory_space = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)? ``` Examples: @@ -76,7 +76,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", xegpu.tensor_desc<8x16xf32> // A TensorDesc with 8x16 f32 elements for a memory region in shared memory space. - xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr> + xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr> ``` }]; @@ -88,11 +88,14 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", TypeBuilderWithInferredContext<(ins "llvm::ArrayRef": $shape, "mlir::Type": $elementType, - CArg<"bool", "false">: $scattered, CArg<"int", "1">: $array_length, - CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope, - CArg<"bool", "true">: $boundary_check - )> + CArg<"bool", "true">: $boundary_check, + CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space)>, + TypeBuilderWithInferredContext<(ins + "llvm::ArrayRef": $shape, + "mlir::Type": $elementType, + CArg<"int", "1">: $chunk_size, + CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space)> ]; let extraClassDeclaration = [{ @@ -110,40 +113,58 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", return llvm::cast(cloneWith(getShape(), elementType)); } - TensorDescAttr getEncodingAsTensorDescAttr() const { - return llvm::dyn_cast_if_present(getEncoding()); + BlockTensorDescAttr getEncodingAsBlockTensorDescAttr() const { + return llvm::dyn_cast_if_present(getEncoding()); } - xegpu::MemoryScope getMemoryScope() const { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getMemoryScope()) - return attr.getMemoryScope().getValue(); + ScatterTensorDescAttr getEncodingAsScatterTensorDescAttr() const { + return llvm::dyn_cast_if_present(getEncoding()); + } + + xegpu::MemorySpace getMemorySpace() const { + auto block_attr = getEncodingAsBlockTensorDescAttr(); + if (block_attr && block_attr.getMemorySpace()) + return block_attr.getMemorySpace().getValue(); + + auto scatter_attr = getEncodingAsScatterTensorDescAttr(); + if (scatter_attr && scatter_attr.getMemorySpace()) + return scatter_attr.getMemorySpace().getValue(); + // return default value - return MemoryScope::Global; + return MemorySpace::Global; } int getArrayLength() { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getArrayLength()) - return attr.getArrayLength().getInt(); + auto attr = getEncoding(); + auto block_attr = mlir::dyn_cast_if_present(attr); + assert((!attr || block_attr) && "invalid on non BlockTensorDescAttr."); + if (block_attr && block_attr.getArrayLength()) + return block_attr.getArrayLength().getInt(); // return default value return 1; } bool getBoundaryCheck() { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getBoundaryCheck()) - return attr.getBoundaryCheck().getValue(); + auto attr = getEncoding(); + auto block_attr = mlir::dyn_cast_if_present(attr); + assert((!attr || block_attr) && "invalid on non BlockTensorDescAttr."); + if (block_attr && block_attr.getBoundaryCheck()) + return block_attr.getBoundaryCheck().getValue(); // return default value return true; } - bool getScattered() { - auto attr = getEncodingAsTensorDescAttr(); - if (attr && attr.getScattered()) - return attr.getScattered().getValue(); - // return default value - return false; + bool isScattered() { + return bool(getEncodingAsScatterTensorDescAttr()); + } + + int getChunkSize() { + auto attr = getEncoding(); + auto scatter_attr = mlir::dyn_cast_if_present(attr); + assert((!attr || scatter_attr) && "invalid on non ScatterTensorDescAttr."); + if (scatter_attr && scatter_attr.getChunkSize()) + return scatter_attr.getChunkSize().getInt(); + return 1; } }]; diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp index be1581d619a8b..fa03442765539 100644 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp @@ -168,9 +168,8 @@ struct TransferReadLowering : public OpRewritePattern { if (isTransposeLoad) std::reverse(descShape.begin(), descShape.end()); auto descType = xegpu::TensorDescType::get( - descShape, elementType, /*scattered=*/false, /*array_length=*/1, - xegpu::MemoryScope::Global, - /*boundary_check=*/isOutOfBounds); + descShape, elementType, /*array_length=*/1, + /*boundary_check=*/isOutOfBounds, xegpu::MemorySpace::Global); xegpu::CreateNdDescOp ndDesc = createNdDescriptor(rewriter, loc, descType, @@ -212,10 +211,10 @@ struct TransferWriteLowering return rewriter.notifyMatchFailure(writeOp, "Expects identity map"); VectorType vecTy = writeOp.getVectorType(); - auto descType = xegpu::TensorDescType::get( - vecTy.getShape(), vecTy.getElementType(), - /*scattered=*/false, /*array_length=*/1, xegpu::MemoryScope::Global, - /*boundary_check=*/false); + auto descType = + xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(), + /*array_length=*/1, /*boundary_check=*/false, + xegpu::MemorySpace::Global); xegpu::CreateNdDescOp ndDesc = createNdDescriptor( rewriter, loc, descType, dyn_cast>(writeOp.getSource()), diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 24719fe748fe4..1dfbaed454c19 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -30,23 +30,35 @@ void XeGPUDialect::initialize() { } //===----------------------------------------------------------------------===// -// XeGPU_TensorDescAttr +// XeGPU_BlockTensorDescAttr //===----------------------------------------------------------------------===// -TensorDescAttr TensorDescAttr::get(mlir::MLIRContext *context, - xegpu::MemoryScope memory_scope, - int array_length, bool boundary_check, - bool scattered) { - auto scopeAttr = MemoryScopeAttr::get(context, memory_scope); +BlockTensorDescAttr BlockTensorDescAttr::get(mlir::MLIRContext *context, + xegpu::MemorySpace memory_space, + int array_length, + bool boundary_check) { + auto scopeAttr = MemorySpaceAttr::get(context, memory_space); auto lengthAttr = IntegerAttr::get(IntegerType::get(context, 64), array_length); auto boundaryAttr = BoolAttr::get(context, boundary_check); - auto scatteredAttr = BoolAttr::get(context, scattered); - return Base::get(context, scopeAttr, lengthAttr, boundaryAttr, scatteredAttr); + return Base::get(context, scopeAttr, lengthAttr, boundaryAttr); +} + +//===----------------------------------------------------------------------===// +// XeGPU_ScatterTensorDescAttr +//===----------------------------------------------------------------------===// +ScatterTensorDescAttr +ScatterTensorDescAttr::get(mlir::MLIRContext *context, + xegpu::MemorySpace memory_space, int chunk_size) { + auto scopeAttr = MemorySpaceAttr::get(context, memory_space); + auto chunkSizeAttr = + IntegerAttr::get(IntegerType::get(context, 64), chunk_size); + return Base::get(context, scopeAttr, chunkSizeAttr); } //===----------------------------------------------------------------------===// // XeGPU_TensorDescType //===----------------------------------------------------------------------===// + mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) { llvm::SmallVector shape; mlir::Type elementType; @@ -108,12 +120,20 @@ void TensorDescType::print(::mlir::AsmPrinter &printer) const { } TensorDescType TensorDescType::get(llvm::ArrayRef shape, - mlir::Type elementType, bool scattered, - int array_length, MemoryScope memory_scope, - bool boundary_check) { + mlir::Type elementType, int array_length, + bool boundary_check, + MemorySpace memory_space) { + auto context = elementType.getContext(); + auto attr = BlockTensorDescAttr::get(context, memory_space, array_length, + boundary_check); + return Base::get(context, shape, elementType, attr); +} + +TensorDescType TensorDescType::get(llvm::ArrayRef shape, + mlir::Type elementType, int chunk_size, + MemorySpace memory_space) { auto context = elementType.getContext(); - auto attr = TensorDescAttr::get(context, memory_scope, array_length, - boundary_check, scattered); + auto attr = ScatterTensorDescAttr::get(context, memory_space, chunk_size); return Base::get(context, shape, elementType, attr); } diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 9c517337a3aa5..1a7a6b3478409 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -124,6 +124,17 @@ LogicalResult CreateNdDescOp::verify() { bool invalidRank = false; bool invalidElemTy = false; + // Memory space of created TensorDesc should match with the source. + // Both source and TensorDesc are considered for global memory by default, + // if the memory scope attr is not specified. If source is an integer, + // it is considered as ptr to global memory. + auto srcMemorySpace = getSourceMemorySpace(); + auto tdescMemorySpace = static_cast(getType().getMemorySpace()); + if (srcMemorySpace != tdescMemorySpace) + return emitOpError("Memory space mismatch.") + << " Source: " << srcMemorySpace + << ", TensorDesc: " << tdescMemorySpace; + // check source type matches the rank if it is a memref. // It also should have the same ElementType as TensorDesc. auto memrefTy = dyn_cast(getSourceType()); @@ -152,9 +163,13 @@ LogicalResult CreateNdDescOp::verify() { return emitOpError("TensorDesc should have the same element " "type with the source if it is a memref.\n"); - if (getType().getScattered()) + if (getType().isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); + if (getType().getRank() == 2 && + tdescMemorySpace == static_cast(MemorySpace::SLM)) + return emitOpError("SLM is not supported for 2D Block TensorDesc.\n"); + return success(); } @@ -163,7 +178,7 @@ LogicalResult CreateNdDescOp::verify() { //===----------------------------------------------------------------------===// LogicalResult PrefetchNdOp::verify() { auto tdescTy = getTensorDescType(); - if (tdescTy.getScattered()) + if (tdescTy.isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); if (!isReadHintOrNone(getL1HintAttr())) @@ -188,7 +203,7 @@ LogicalResult LoadNdOp::verify() { if (tdescTy.getRank() > 2) return emitOpError("Expecting a 1D/2D TensorDesc.\n"); - if (tdescTy.getScattered()) + if (tdescTy.isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); if (!valueTy) @@ -228,8 +243,8 @@ LogicalResult LoadNdOp::verify() { tdescShape[axis] /= vnni_factor; tdescShape.push_back(vnni_factor); } else { - return emitWarning("Invalid Packed Attr. It is ignored (available for 2D " - "TensorDesc only)."); + emitWarning("Invalid Packed Attr. It is ignored (available for 2D " + "TensorDesc only)."); } } @@ -256,7 +271,7 @@ LogicalResult StoreNdOp::verify() { if (dstTy.getRank() > 2) return emitOpError("Expecting a 1D/2D TensorDesc.\n"); - if (dstTy.getScattered()) + if (dstTy.isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); if (!valTy) @@ -279,7 +294,7 @@ LogicalResult StoreNdOp::verify() { //===----------------------------------------------------------------------===// LogicalResult UpdateNdOffsetOp::verify() { auto ty = getTensorDescType(); - if (ty.getScattered()) + if (ty.isScattered()) return emitOpError("Expects a non-scattered TensorDesc.\n"); // number of offsets specified must match the rank of the tensor descriptor @@ -292,28 +307,55 @@ LogicalResult UpdateNdOffsetOp::verify() { //===----------------------------------------------------------------------===// // XeGPU_CreateDescOp //===----------------------------------------------------------------------===// -void CreateDescOp::build(OpBuilder &builder, OperationState &state, - TensorDescType TensorDesc, Value source, - llvm::ArrayRef offsets, - uint32_t chunk_size) { - llvm::SmallVector staticOffsets; - llvm::SmallVector dynamicOffsets; - dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); - build(builder, state, TensorDesc, source, dynamicOffsets, staticOffsets, - chunk_size); -} LogicalResult CreateDescOp::verify() { auto tdescTy = getTensorDescType(); - auto chunkSize = getChunkSize(); if (getRankOf(getSource()) > 1) return emitOpError( "Expecting the source is a 1D memref or pointer (uint64_t)."); - if (!tdescTy.getScattered()) + if (!tdescTy.isScattered()) return emitOpError("Expects a scattered TensorDesc.\n"); + // Memory space of created TensorDesc should match with the source. + // Both source and TensorDesc are considered for global memory by default, + // if the memory scope attr is not specified. If source is an integer, + // it is considered as ptr to global memory. + auto srcMemorySpace = getSourceMemorySpace(); + auto tdescMemorySpace = static_cast(tdescTy.getMemorySpace()); + if (srcMemorySpace != tdescMemorySpace) + return emitOpError("Memory space mismatch.") + << " Source: " << srcMemorySpace + << ", TensorDesc: " << tdescMemorySpace; + + auto chunkSize = tdescTy.getChunkSize(); + + // check chunk_size + llvm::SmallVector supportedChunkSizes = {1, 2, 3, 4, 8, + 16, 32, 64, 128, 256}; + if (!llvm::is_contained(supportedChunkSizes, chunkSize)) + return emitOpError("Invalid chunk_size. Supported values are 1, 2, 3, 4, " + "8, 16, 32, 64, 128, or 256."); + + // check total size + auto elemBits = tdescTy.getElementType().getIntOrFloatBitWidth(); + auto bitsPerLane = elemBits * chunkSize; + if (chunkSize > 1 && bitsPerLane % 32) { + // For 8-bit and 16-bit data, the hardware only supports chunk size of 1. + // For 32-bit data, the hardware can support larger larger chunk size. So + // we can bitcast 8-bit/16-bit data to 32-bit data for better performance. + // But this requires the total size is 32 bit aligned to make the + // optimization work. + return emitOpError( + "access size (chunk_size * sizeof(elemTy)) should be 32-bit aligned."); + } + + auto lscConstraints = 512 * 8; // each access is upto 512 bytes. + if (elemBits * tdescTy.getNumElements() > lscConstraints) + return emitOpError("total access size (simd_lanes * chunk_size * " + "sizeof(elemTy)) is upto 512 bytes."); + SmallVector shape({(int64_t)getNumOffsets()}); if (chunkSize != 1) shape.push_back(chunkSize); @@ -331,7 +373,7 @@ LogicalResult CreateDescOp::verify() { //===----------------------------------------------------------------------===// LogicalResult PrefetchOp::verify() { auto tdescTy = getTensorDescType(); - if (!tdescTy.getScattered()) + if (!tdescTy.isScattered()) return emitOpError("Expects a scattered TensorDesc.\n"); if (!isReadHintOrNone(getL1HintAttr())) @@ -354,7 +396,7 @@ LogicalResult LoadGatherOp::verify() { auto maskTy = getMaskType(); auto valueTy = getValueType(); - if (!tdescTy.getScattered()) + if (!tdescTy.isScattered()) return emitOpError("Expects a scattered TensorDesc.\n"); if (!isReadHintOrNone(getL1HintAttr())) @@ -379,12 +421,10 @@ LogicalResult LoadGatherOp::verify() { if (tdescShape[0] != maskShape[0]) return emitOpError("dim-0 of the Mask and TensorDesc should be the same."); - if (getTransposeAttr()) { - auto trans = getTranspose().value(); - if (tdescShape.size() < trans.size()) - emitWarning("Invalid transpose attr. It is ignored."); - else - transpose(trans, tdescShape); + if (tdescTy.getRank() == 2) { + if (!getTransposeAttr()) + return emitOpError("load_gather has to be transposed."); + transpose({1, 0}, tdescShape); } if (valueShape != tdescShape) @@ -400,7 +440,7 @@ LogicalResult LoadGatherOp::verify() { //===----------------------------------------------------------------------===// LogicalResult StoreScatterOp::verify() { auto tdescTy = getTensorDescType(); - if (!tdescTy.getScattered()) + if (!tdescTy.isScattered()) return emitOpError("Expects a scattered TensorDesc.\n"); if (!isWriteHintOrNone(getL1HintAttr())) @@ -413,11 +453,24 @@ LogicalResult StoreScatterOp::verify() { return emitOpError("invlid l3_hint: ") << getL3HintAttr(); auto maskTy = getMaskType(); + auto valueTy = getValueType(); auto maskShape = getShapeOf(maskTy); auto tdescShape = getShapeOf(tdescTy); + auto valueShape = getShapeOf(valueTy); if (tdescShape[0] != maskShape[0]) return emitOpError("dim-0 of the Mask and TensorDesc should be the same."); + if (tdescTy.getRank() == 2) { + if (!getTransposeAttr()) + return emitOpError("load_gather has to be transposed."); + transpose({1, 0}, tdescShape); + } + + if (valueShape != tdescShape) + return emitOpError("Unexpected value shape") + << "(Expected shape: " << makeString(tdescShape) + << ", Given shape: " << makeString(valueShape) << ").\n"; + return success(); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir index 35d44cf56a239..c1126efb6046d 100644 --- a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir +++ b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir @@ -24,8 +24,8 @@ gpu.func @test_create_nd_tdesc_vc_2(%src: ui64, %w : index, %h : index, %x : ind // CHECK: gpu.func @test_create_nd_tdesc_vc_3(%[[arg0:.*]]: memref<24x32xf32>) { gpu.func @test_create_nd_tdesc_vc_3(%src: memref<24x32xf32>) { - // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr - %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr> + // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.block_tdesc_attr + %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.block_tdesc_attr> gpu.return } @@ -36,6 +36,13 @@ gpu.func @test_create_nd_tdesc_vc_4(%src: memref<2x24x32xf32>) { gpu.return } +// CHECK: gpu.func @test_create_nd_tdesc_vc_5(%[[arg0:.*]]: memref<2x24x32xf32, 3>) { +gpu.func @test_create_nd_tdesc_vc_5(%src: memref<2x24x32xf32, 3>) { + // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<16xf32, #xegpu.block_tdesc_attr> + %1 = xegpu.create_nd_tdesc %src[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<16xf32, #xegpu.block_tdesc_attr> + gpu.return +} + // CHECK: gpu.func @test_prefetch_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) { gpu.func @test_prefetch_nd_vc(%src: memref<24x32xf16>) { // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> @@ -97,17 +104,24 @@ gpu.func @test_create_update_nd_tdesc_vc(%src: memref<24x32xf32>) { // CHECK: gpu.func @test_create_tdesc_vc(%[[arg0:.*]]: ui64) { gpu.func @test_create_tdesc_vc(%src: ui64) { - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + gpu.return +} + +// CHECK: gpu.func @test_create_tdesc_vc_1(%[[arg0:.*]]: memref) { +gpu.func @test_create_tdesc_vc_1(%src: memref) { + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : memref -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : memref -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> gpu.return } // CHECK: gpu.func @test_prefetch_vc(%[[arg0:.*]]: ui64) { gpu.func @test_prefetch_vc(%src: ui64) { - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - // CHECK: xegpu.prefetch %[[R0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + // CHECK: xegpu.prefetch %[[R0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> gpu.return } @@ -115,12 +129,12 @@ gpu.func @test_prefetch_vc(%src: ui64) { gpu.func @test_load_gather_vc(%src: ui64) { //CHECK: %[[cst:.*]] = arith.constant dense : vector<4xi1> %0 = arith.constant dense<1>: vector<4xi1> - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - //CHECK-SAME: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> -> vector<4x2xf32> - %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> -> vector<4x2xf32> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}> + //CHECK-SAME: !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> -> vector<2x4xf32> + %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}> + : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> -> vector<2x4xf32> gpu.return } @@ -128,23 +142,23 @@ gpu.func @test_load_gather_vc(%src: ui64) { gpu.func @test_store_scatter_vc(%src: ui64) { //CHECK: %[[c0:.*]] = arith.constant dense : vector<4xi1> %0 = arith.constant dense<1>: vector<4xi1> - //CHECK: %[[c1:.*]] = arith.constant dense<2.900000e+00> : vector<4x2xf32> - %1 = arith.constant dense<2.9>: vector<4x2xf32> - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %2 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - //CHECK: xegpu.store %[[c1]], %[[R0]], %[[c0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - //CHECK-SAME: vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> - xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}> - : vector<4x2xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + //CHECK: %[[c1:.*]] = arith.constant dense<2.900000e+00> : vector<2x4xf32> + %1 = arith.constant dense<2.9>: vector<2x4xf32> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %2 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + //CHECK: xegpu.store %[[c1]], %[[R0]], %[[c0]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}> + //CHECK-SAME: vector<2x4xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> + xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}> + : vector<2x4xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> gpu.return } // CHECK: gpu.func @test_create_update_tdesc_vc(%[[arg0:.*]]: ui64) { gpu.func @test_create_update_tdesc_vc(%src: ui64) { - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] {chunk_size = 2 : i64} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - //CHECK: %[[R1:.*]] = xegpu.update_offset %[[R0]], [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> - %2 = xegpu.update_offset %1, [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %arg0 [0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24]: ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + //CHECK: %[[R1:.*]] = xegpu.update_offset %[[R0]], [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> + %2 = xegpu.update_offset %1, [32, 32, 32, 32] : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> gpu.return } @@ -165,10 +179,10 @@ gpu.func @test_dpas_vc_with_packed_b(%a : vector<8x16xf16>, %b: vector<8x16x2xf1 // CHECK: gpu.func @test_atomic_rmw(%[[arg0:.*]]: ui64, %[[arg1:.*]]: vector<16xf32>, %[[arg2:.*]]: vector<16xi1>) gpu.func @test_atomic_rmw(%src: ui64, %value : vector<16xf32>, %mask : vector<16xi1>) { - //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] : ui64 -> !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr> - %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]: ui64 -> !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr> - //CHECK: %[[R1:.*]] = xegpu.atomic_rmw addf %[[R0]], %[[arg2]], %[[arg1]] : !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr>, vector<16xi1>, vector<16xf32> -> vector<16xf32> - xegpu.atomic_rmw addf %1, %mask, %value: !xegpu.tensor_desc<16xf32, #xegpu.tdesc_attr>, vector<16xi1>, vector<16xf32> -> vector<16xf32> + //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] : ui64 -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>> + %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]: ui64 -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>> + //CHECK: %[[R1:.*]] = xegpu.atomic_rmw addf %[[R0]], %[[arg2]], %[[arg1]] : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>, vector<16xf32> -> vector<16xf32> + xegpu.atomic_rmw addf %1, %mask, %value: !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>, vector<16xf32> -> vector<16xf32> gpu.return } diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index 7ef50bb2b5fad..193dae352e370 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -15,6 +15,20 @@ func.func @test_create_nd_tdesc_vc_2(%src: memref<24x32xf32>) { return } +// ----- +func.func @test_create_nd_tdesc_vc_3(%src: memref<2x24x32xf32, 3>) { + // expected-error@+1 {{SLM is not supported for 2D Block TensorDesc}} + %1 = xegpu.create_nd_tdesc %src[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr> + return +} + +// ----- +func.func @test_create_nd_tdesc_vc_4(%src: memref<2x24x32xf32, 3>) { + // expected-error@+1 {{Memory space mismatch}} + %1 = xegpu.create_nd_tdesc %src[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<16xf32> + return +} + // ----- func.func @test_prefetch_nd_vc_1(%src: memref<24x32xf16>) { %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> @@ -26,10 +40,10 @@ func.func @test_prefetch_nd_vc_1(%src: memref<24x32xf16>) { // ----- func.func @test_prefetch_nd_vc_2(%src: memref<24xf16>) { %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7] - : memref<24xf16> -> !xegpu.tensor_desc<8xf16, #xegpu.tdesc_attr> + : memref<24xf16> -> !xegpu.tensor_desc<8xf16, #xegpu.scatter_tdesc_attr<>> // expected-error@+1 {{Expects a non-scattered TensorDesc}} xegpu.prefetch_nd %1 <{l1_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<8xf16, #xegpu.tdesc_attr> + : !xegpu.tensor_desc<8xf16, #xegpu.scatter_tdesc_attr<>> return } @@ -44,11 +58,11 @@ func.func @test_load_nd_vc_1(%src: memref<8x16xf16>) { // ----- func.func @test_load_nd_vc_2(%src: memref<16xf16>) { - %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{Expects a non-scattered TensorDesc.}} %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> -> vector<8x2xf16> + : !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> -> vector<8x2xf16> return } @@ -73,28 +87,28 @@ func.func @test_store_nd_vc_1(%dst: memref<24x32xf16>) { // ----- func.func @test_store_nd_vc_2(%dst: memref<16xf16>) { %1 = arith.constant dense<1.0>: vector<8x2xf16> - %2 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + %2 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{Expects a non-scattered TensorDesc}} xegpu.store_nd %1, %2 <{l1_hint = #xegpu.cache_hint}> - : vector<8x2xf16>, !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + : vector<8x2xf16>, !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> return } // ----- func.func @test_update_nd_offset_1(%dst: memref<16xf16>) { - %1 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %dst[0, 2, 4, 6, 8, 10, 12, 14] + : memref<16xf16> -> !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{Expects a non-scattered TensorDesc}} - xegpu.update_nd_offset %1, [0, 2] : !xegpu.tensor_desc<8x2xf16, #xegpu.tdesc_attr> + xegpu.update_nd_offset %1, [0, 2] : !xegpu.tensor_desc<8x2xf16, #xegpu.scatter_tdesc_attr> return } // ----- func.func @test_create_tdesc_vc_1(%src: ui64) { // expected-error@+1 {{Expects a scattered TensorDesc}} - %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : ui64 -> !xegpu.tensor_desc<8x2xf16> + %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] + : ui64 -> !xegpu.tensor_desc<8xf16> return } @@ -102,7 +116,14 @@ func.func @test_create_tdesc_vc_1(%src: ui64) { func.func @test_create_tdesc_vc_2(%src: ui64) { // expected-error@+1 {{Incorrect TensorDesc shape}} %1 = xegpu.create_tdesc %src[0, 2, 4, 6, 8, 10, 12, 14] {chunk_size = 2} - : ui64 -> !xegpu.tensor_desc<8x4xf16, #xegpu.tdesc_attr> + : ui64 -> !xegpu.tensor_desc<8x4xf16, #xegpu.scatter_tdesc_attr<>> + return +} + +// ----- +func.func @test_create_tdesc_vc_1(%src: memref) { + // expected-error@+1 {{Memory space mismatch}} + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : memref -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> return } @@ -116,9 +137,9 @@ func.func @test_prefetch_vc_1(%src: memref<24x32xf16>) { // ----- func.func @test_prefetch_vc_2(%src: ui64) { - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} - xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + xegpu.prefetch %1 <{l1_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> return } @@ -135,11 +156,11 @@ func.func @test_load_gather_vc_1(%src: memref<24x32xf16>) { // ----- func.func @test_load_gather_vc_2(%src: ui64) { %0 = arith.constant dense<1>: vector<4xi1> - %1 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} : ui64 - -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %1 = xegpu.create_tdesc %src[0, 8, 16, 24] : ui64 + -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint}> - : !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> -> vector<4x2xf32> return } @@ -159,11 +180,11 @@ func.func @test_store_scatter_vc_1(%src: memref<24x32xf32>) { func.func @test_store_scatter_vc_2(%src: ui64) { %0 = arith.constant dense<1>: vector<4xi1> %1 = arith.constant dense<2.9>: vector<4x2xf32> - %2 = xegpu.create_tdesc %src[0, 8, 16, 24] {chunk_size = 2} - : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr> + %2 = xegpu.create_tdesc %src[0, 8, 16, 24] + : ui64 -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr> // expected-error@+1 {{invlid l1_hint: #xegpu.cache_hint}} xegpu.store %1, %2, %0 <{l1_hint = #xegpu.cache_hint}> : vector<4x2xf32>, - !xegpu.tensor_desc<4x2xf32, #xegpu.tdesc_attr>, vector<4xi1> + !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr>, vector<4xi1> return } @@ -182,9 +203,9 @@ func.func @test_dpas_vc_2(%a : vector<8x8x2xf16>, %b: vector<8x16x2xf16>) { } // ----- -func.func @test_atomic_rmw(%src: ui64, %value : vector<16x8xf32>, %mask : vector<16xi1>) { - %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] {chunk_size = 8}: ui64 -> !xegpu.tensor_desc<16x8xf32, #xegpu.tdesc_attr> - // expected-error@+1 {{failed to verify that all of {tensorDesc, mask, value, result} have same shape}} - xegpu.atomic_rmw addf %1, %mask, %value: !xegpu.tensor_desc<16x8xf32, #xegpu.tdesc_attr>, vector<16xi1>, vector<16x8xf32> -> vector<16x8xf32> - gpu.return +func.func @test_atomic_rmw(%src: ui64, %value : vector<16x4xf32>, %mask : vector<16xi1>) { + %1 = xegpu.create_tdesc %src[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] : ui64 -> !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr> + // expected-error@+1 {{failed to verify that all of {tensorDesc, value, result} have same shape}} + xegpu.atomic_rmw addf %1, %mask, %value: !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr>, vector<16xi1>, vector<16x4xf32> -> vector<16x8xf32> + return } \ No newline at end of file From 36757613b73908f055674a8df0b51cc00aa04373 Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Tue, 24 Sep 2024 08:15:14 -0700 Subject: [PATCH 004/212] [NVVM] Upgrade nvvm.ptr.* intrinics to addrspace cast (#109710) Remove the following intrinsics which can be trivially replaced with an `addrspacecast` * llvm.nvvm.ptr.gen.to.global * llvm.nvvm.ptr.gen.to.shared * llvm.nvvm.ptr.gen.to.constant * llvm.nvvm.ptr.gen.to.local * llvm.nvvm.ptr.global.to.gen * llvm.nvvm.ptr.shared.to.gen * llvm.nvvm.ptr.constant.to.gen * llvm.nvvm.ptr.local.to.gen Also, cleanup the NVPTX lowering of `addrspacecast` making it more concise. --- llvm/docs/NVPTXUsage.rst | 63 ------------- llvm/docs/ReleaseNotes.rst | 12 +++ llvm/include/llvm/IR/IntrinsicsNVVM.td | 50 +++------- llvm/lib/IR/AutoUpgrade.cpp | 19 ++++ llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 58 ++++++------ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 4 - llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 92 ++++++------------- .../Assembler/auto_upgrade_nvvm_intrinsics.ll | 35 +++++++ llvm/test/CodeGen/NVPTX/intrin-nocapture.ll | 21 ----- llvm/test/DebugInfo/NVPTX/debug-info.ll | 20 ++-- 10 files changed, 146 insertions(+), 228 deletions(-) delete mode 100644 llvm/test/CodeGen/NVPTX/intrin-nocapture.ll diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 3a566bbac3623..8b0b05c0ea424 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -127,69 +127,6 @@ Example: 64-bit PTX for CUDA Driver API: ``nvptx64-nvidia-cuda`` NVPTX Intrinsics ================ -Address Space Conversion ------------------------- - -'``llvm.nvvm.ptr.*.to.gen``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" - -These are overloaded intrinsics. You can use these on any pointer types. - -.. code-block:: llvm - - declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1)) - declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) - declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) - declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) - -Overview: -""""""""" - -The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic -address space to a generic address space pointer. - -Semantics: -"""""""""" - -These intrinsics modify the pointer value to be a valid generic address space -pointer. - - -'``llvm.nvvm.ptr.gen.to.*``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" - -These are overloaded intrinsics. You can use these on any pointer types. - -.. code-block:: llvm - - declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) - declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) - declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr) - declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr) - -Overview: -""""""""" - -The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic -address space to a pointer in the target address space. Note that these -intrinsics are only useful if the address space of the target address space of -the pointer is known. It is not legal to use address space conversion -intrinsics to convert a pointer from one non-generic address space to another -non-generic address space. - -Semantics: -"""""""""" - -These intrinsics modify the pointer value to be a valid pointer in the target -non-generic address space. - - Reading PTX Special Registers ----------------------------- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 27d6bc158b3c3..c85ea28ad9f8c 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -69,6 +69,18 @@ Changes to the LLVM IR * ``llvm.nvvm.rotate.right.b64`` * ``llvm.nvvm.rotate.b64`` +* Remove the following intrinsics which can be replaced with an + ``addrspacecast``: + + * ``llvm.nvvm.ptr.gen.to.global`` + * ``llvm.nvvm.ptr.gen.to.shared`` + * ``llvm.nvvm.ptr.gen.to.constant`` + * ``llvm.nvvm.ptr.gen.to.local`` + * ``llvm.nvvm.ptr.global.to.gen`` + * ``llvm.nvvm.ptr.shared.to.gen`` + * ``llvm.nvvm.ptr.constant.to.gen`` + * ``llvm.nvvm.ptr.local.to.gen`` + Changes to LLVM infrastructure ------------------------------ diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index aa5294f5f9c90..7b8ffe417fccd 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -30,10 +30,18 @@ // * llvm.nvvm.max.ui --> select(x ule y, x, y) // * llvm.nvvm.max.ull --> ibid. // * llvm.nvvm.h2f --> llvm.convert.to.fp16.f32 -// * llvm.nvvm.bitcast.f2i --> bitcast -// * llvm.nvvm.bitcast.i2f --> ibid. -// * llvm.nvvm.bitcast.d2ll --> ibid. -// * llvm.nvvm.bitcast.ll2d --> ibid. +// * llvm.nvvm.bitcast.f2i --> bitcast +// * llvm.nvvm.bitcast.i2f --> ibid. +// * llvm.nvvm.bitcast.d2ll --> ibid. +// * llvm.nvvm.bitcast.ll2d --> ibid. +// * llvm.nvvm.ptr.gen.to.global --> addrspacecast +// * llvm.nvvm.ptr.gen.to.shared --> ibid. +// * llvm.nvvm.ptr.gen.to.constant --> ibid. +// * llvm.nvvm.ptr.gen.to.local --> ibid. +// * llvm.nvvm.ptr.global.to.gen --> ibid. +// * llvm.nvvm.ptr.shared.to.gen --> ibid. +// * llvm.nvvm.ptr.constant.to.gen --> ibid. +// * llvm.nvvm.ptr.local.to.gen --> ibid. def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr @@ -1602,40 +1610,6 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.p">; -// Use for generic pointers -// - These intrinsics are used to convert address spaces. -// - The input pointer and output pointer must have the same type, except for -// the address-space. (This restriction is not enforced here as there is -// currently no way to describe it). -// - This complements the llvm bitcast, which can be used to cast one type -// of pointer to another type of pointer, while the address space remains -// the same. -def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.local.to.gen">; -def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.shared.to.gen">; -def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.global.to.gen">; -def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.constant.to.gen">; - -def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.gen.to.global">; -def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.gen.to.shared">; -def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.gen.to.local">; -def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.gen.to.constant">; - // Used in nvvm internally to help address space opt and ptx code generation // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 3390d651d6c69..b84258398c193 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1275,6 +1275,16 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, else if (Name.consume_front("rotate.")) // nvvm.rotate.{b32,b64,right.b64} Expand = Name == "b32" || Name == "b64" || Name == "right.b64"; + else if (Name.consume_front("ptr.gen.to.")) + // nvvm.ptr.gen.to.{local,shared,global,constant} + Expand = Name.starts_with("local") || Name.starts_with("shared") || + Name.starts_with("global") || Name.starts_with("constant"); + else if (Name.consume_front("ptr.")) + // nvvm.ptr.{local,shared,global,constant}.to.gen + Expand = + (Name.consume_front("local") || Name.consume_front("shared") || + Name.consume_front("global") || Name.consume_front("constant")) && + Name.starts_with(".to.gen"); else Expand = false; @@ -2338,6 +2348,15 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, {Arg, Arg, ZExtShiftAmt}); + } else if ((Name.consume_front("ptr.gen.to.") && + (Name.starts_with("local") || Name.starts_with("shared") || + Name.starts_with("global") || Name.starts_with("constant"))) || + (Name.consume_front("ptr.") && + (Name.consume_front("local") || Name.consume_front("shared") || + Name.consume_front("global") || + Name.consume_front("constant")) && + Name.starts_with(".to.gen"))) { + Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType()); } else { Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); if (IID != Intrinsic::not_intrinsic && diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 56c96ea943b89..7f942de74bdcc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1109,11 +1109,21 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { AddrSpaceCastSDNode *CastN = cast(N); unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); unsigned DstAddrSpace = CastN->getDestAddressSpace(); + SDLoc DL(N); assert(SrcAddrSpace != DstAddrSpace && "addrspacecast must be between different address spaces"); if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { // Specific to generic + + if (TM.is64Bit() && TM.getPointerSizeInBits(SrcAddrSpace) == 32) { + SDValue CvtNone = + CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); + SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u64_u32, DL, MVT::i64, + Src, CvtNone); + Src = SDValue(Cvt, 0); + } + unsigned Opc; switch (SrcAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); @@ -1121,26 +1131,16 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global; break; case ADDRESS_SPACE_SHARED: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 - ? NVPTX::cvta_shared_6432 - : NVPTX::cvta_shared_64) - : NVPTX::cvta_shared; + Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared; break; case ADDRESS_SPACE_CONST: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 - ? NVPTX::cvta_const_6432 - : NVPTX::cvta_const_64) - : NVPTX::cvta_const; + Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const; break; case ADDRESS_SPACE_LOCAL: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 - ? NVPTX::cvta_local_6432 - : NVPTX::cvta_local_64) - : NVPTX::cvta_local; + Opc = TM.is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local; break; } - ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), - Src)); + ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src)); return; } else { // Generic to specific @@ -1153,30 +1153,28 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global; break; case ADDRESS_SPACE_SHARED: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 - ? NVPTX::cvta_to_shared_3264 - : NVPTX::cvta_to_shared_64) - : NVPTX::cvta_to_shared; + Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared; break; case ADDRESS_SPACE_CONST: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 - ? NVPTX::cvta_to_const_3264 - : NVPTX::cvta_to_const_64) - : NVPTX::cvta_to_const; + Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const; break; case ADDRESS_SPACE_LOCAL: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 - ? NVPTX::cvta_to_local_3264 - : NVPTX::cvta_to_local_64) - : NVPTX::cvta_to_local; + Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local; break; case ADDRESS_SPACE_PARAM: - Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 - : NVPTX::nvvm_ptr_gen_to_param; + Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr; break; } - ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), - Src)); + + SDNode *CVTA = CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src); + if (TM.is64Bit() && TM.getPointerSizeInBits(DstAddrSpace) == 32) { + SDValue CvtNone = + CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); + CVTA = CurDAG->getMachineNode(NVPTX::CVT_u32_u64, DL, MVT::i32, + SDValue(CVTA, 0), CvtNone); + } + + ReplaceNode(N, CVTA); return; } } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index f6bbf4c2ffc02..c3a8a774673f2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -174,10 +174,6 @@ def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" "&& Subtarget->getPTXVersion() >= 64)">; -def useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">; -def useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">; -def useShortPtrConst : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_CONST) == 32">; - def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 2688cfbe5e824..042b0965ea33f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2537,59 +2537,45 @@ defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -multiclass NG_TO_G { +multiclass NG_TO_G { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - !strconcat("cvta.", Str, ".u32 \t$result, $src;"), - [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + "cvta." # Str # ".u32 \t$result, $src;", []>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - !strconcat("cvta.", Str, ".u64 \t$result, $src;"), - [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; - def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), - "{{ .reg .b64 %tmp;\n\t" - #" cvt.u64.u32 \t%tmp, $src;\n\t" - #" cvta." # Str # ".u64 \t$result, %tmp; }}", - [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, - Requires<[ShortPtr]>; + "cvta." # Str # ".u64 \t$result, $src;", []>; } -multiclass G_TO_NG { +multiclass G_TO_NG { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), - [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + "cvta.to." # Str # ".u32 \t$result, $src;", []>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), - [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; - def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), - "{{ .reg .b64 %tmp;\n\t" - #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" - #" cvt.u32.u64 \t$result, %tmp; }}", - [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, - Requires<[ShortPtr]>; -} - -defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>; -defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>; -defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>; -defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>; -defm cvta_param : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>; - -defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>; -defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>; -defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>; -defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>; + "cvta.to." # Str # ".u64 \t$result, $src;", []>; +} + +defm cvta_local : NG_TO_G<"local">; +defm cvta_shared : NG_TO_G<"shared">; +defm cvta_global : NG_TO_G<"global">; +defm cvta_const : NG_TO_G<"const">; + +defm cvta_to_local : G_TO_NG<"local">; +defm cvta_to_shared : G_TO_NG<"shared">; +defm cvta_to_global : G_TO_NG<"global">; +defm cvta_to_const : G_TO_NG<"const">; + +// nvvm.ptr.param.to.gen +defm cvta_param : NG_TO_G<"param">; + +def : Pat<(int_nvvm_ptr_param_to_gen Int32Regs:$src), + (cvta_param Int32Regs:$src)>; + +def : Pat<(int_nvvm_ptr_param_to_gen Int64Regs:$src), + (cvta_param_64 Int64Regs:$src)>; // nvvm.ptr.gen.to.param -def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), - (ins Int32Regs:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, - (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; -def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), - (ins Int64Regs:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, - (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; +def : Pat<(int_nvvm_ptr_gen_to_param Int32Regs:$src), + (IMOV32rr Int32Regs:$src)>; +def : Pat<(int_nvvm_ptr_gen_to_param Int64Regs:$src), + (IMOV64rr Int64Regs:$src)>; // nvvm.move intrinsicc def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), @@ -2632,24 +2618,6 @@ def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), [(set Int64Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ - -// MoveParam %r1, param -// ptr_local_to_gen %r2, %r1 -// ptr_gen_to_local %r3, %r2 -// -> -// mov %r1, param - -// @TODO: Revisit this. There is a type -// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym -// instructions are not currently defined. However, we can use the ptr -// variants and the asm printer will do the right thing. -def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen - (MoveParam texternalsym:$src)))), - (nvvm_move_ptr64 texternalsym:$src)>; -def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen - (MoveParam texternalsym:$src)))), - (nvvm_move_ptr32 texternalsym:$src)>; - def texsurf_handles : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), "mov.u64 \t$result, $src;", []>; diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 43ac246055da7..584c0ef7cfeb7 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -35,6 +35,15 @@ declare i32 @llvm.nvvm.rotate.b32(i32, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) declare i64 @llvm.nvvm.rotate.b64(i64, i32) +declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) +declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) +declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr) +declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr) +declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1)) +declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) +declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) +declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) + ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -156,3 +165,29 @@ define void @rotate(i32 %a, i64 %b) { %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8) ret void } + +; CHECK-LABEL: @addrspacecast +define void @addrspacecast(ptr %p0) { +; CHECK: %1 = addrspacecast ptr %p0 to ptr addrspace(1) +; CHECK: %2 = addrspacecast ptr addrspace(1) %1 to ptr +; CHECK: %3 = addrspacecast ptr %2 to ptr addrspace(3) +; CHECK: %4 = addrspacecast ptr addrspace(3) %3 to ptr +; CHECK: %5 = addrspacecast ptr %4 to ptr addrspace(4) +; CHECK: %6 = addrspacecast ptr addrspace(4) %5 to ptr +; CHECK: %7 = addrspacecast ptr %6 to ptr addrspace(5) +; CHECK: %8 = addrspacecast ptr addrspace(5) %7 to ptr +; + %p1 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %p0) + %p2 = call ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1) %p1) + + %p3 = call ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr %p2) + %p4 = call ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3) %p3) + + %p5 = call ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr %p4) + %p6 = call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) %p5) + + %p7 = call ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr %p6) + %p8 = call ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5) %p7) + + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll deleted file mode 100644 index 040bbde13800c..0000000000000 --- a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt < %s -O3 -S | FileCheck %s - -; Address space intrinsics were erroneously marked NoCapture, leading to bad -; optimizations (such as the store below being eliminated as dead code). This -; test makes sure we don't regress. - -declare void @foo(ptr addrspace(1)) - -declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) - -; CHECK: @bar -define void @bar() { - %t1 = alloca i32 -; CHECK: call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr nonnull %t1) -; CHECK-NEXT: store i32 10, ptr %t1 - %t2 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %t1) - store i32 10, ptr %t1 - call void @foo(ptr addrspace(1) %t2) - ret void -} - diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 9948925db57c9..922a420820f46 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -25,6 +25,10 @@ ; CHECK-DAG: .reg .b64 %rd<8>; ; CHECK: .loc [[DEBUG_INFO_CU:[0-9]+]] 5 0 ; CHECK: ld.param.u32 %r{{.+}}, [{{.+}}]; +; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; +; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; +; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; +; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[BUILTUIN_VARS_H:[0-9]+]] 78 180 ; CHECK: mov.u32 %r{{.+}}, %ctaid.x; ; CHECK: .loc [[BUILTUIN_VARS_H]] 89 180 @@ -38,10 +42,6 @@ ; CHECK: .loc [[DEBUG_INFO_CU]] 7 7 ; CHECK: @%p{{.+}} bra [[BB:\$L__.+]]; ; CHECK: ld.param.f32 %f{{.+}}, [{{.+}}]; -; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; -; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; -; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; -; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[DEBUG_INFO_CU]] 8 13 ; CHECK: mul.wide.u32 %rd{{.+}}, %r{{.+}}, 4; ; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}}; @@ -2661,22 +2661,22 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b32 4579 // DW_AT_type ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8aa:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 707 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp0 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 11 // DW_AT_call_column ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8c2:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 1466 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 24 // DW_AT_call_column ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8da:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 2060 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp4 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column From bcbdf7ad6b571d11c102d018c78ee0fbf71e3e2c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 24 Sep 2024 08:07:50 -0700 Subject: [PATCH 005/212] [RISCV][TTI/SLP] Add test coverage for select of constants costing Provides coverage for an upcoming change which accounts for the cost of materializing the vector constants in the vector select. --- .../Analysis/CostModel/RISCV/rvv-select.ll | 25 +++++++++ .../RISCV/select-profitability.ll | 55 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll index 13994c46335de..9eadcaca6bb55 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll @@ -390,3 +390,28 @@ define void @select() { ret void } + +define void @select_of_constants() { +; CHECK-LABEL: 'select_of_constants' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = select i1 undef, <2 x i64> , <2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = select <4 x i1> undef, <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + ; Splat constants + select i1 undef, <2 x i64> , <2 x i64> zeroinitializer + ; LHS is a VID patern + select i1 undef, <2 x i64> , <2 x i64> zeroinitializer + select i1 undef, <2 x i64> , <2 x i64> zeroinitializer + ; 2x general (expensive) constants + select i1 undef, <2 x i64> , <2 x i64> + + ; powers of two (still expensive) + select <4 x i1> undef, <4 x i32> , <4 x i32> zeroinitializer + + ret void +} + + diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll new file mode 100644 index 0000000000000..4496b19fa200c --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v < %s | FileCheck %s + +define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) { +; CHECK-LABEL: define i32 @pow2_zero_constant_shift( +; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: ret i32 [[TMP7]] +; + %t39.i0 = icmp eq i16 %a, 1 + %t39.i1 = icmp eq i16 %b, 1 + %t39.i2 = icmp eq i16 %c, 1 + %t39.i3 = icmp eq i16 %d, 1 + %t40.i0 = select i1 %t39.i0, i32 65536, i32 0 + %t40.i1 = select i1 %t39.i1, i32 65536, i32 0 + %t40.i2 = select i1 %t39.i2, i32 65536, i32 0 + %t40.i3 = select i1 %t39.i3, i32 65536, i32 0 + %or.rdx0 = or i32 %t40.i0, %t40.i1 + %or.rdx1 = or i32 %t40.i2, %t40.i3 + %or.rdx2 = or i32 %or.rdx0, %or.rdx1 + ret i32 %or.rdx2 +} + +; TODO: This case is unprofitable, and we should not be vectorizing this. +define i32 @pow2_zero_variable_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) { +; CHECK-LABEL: define i32 @pow2_zero_variable_shift( +; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: [[OR_RDX2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: ret i32 [[OR_RDX2]] +; + %t39.i0 = icmp eq i16 %a, 1 + %t39.i1 = icmp eq i16 %b, 1 + %t39.i2 = icmp eq i16 %c, 1 + %t39.i3 = icmp eq i16 %d, 1 + %t40.i0 = select i1 %t39.i0, i32 524288, i32 0 + %t40.i1 = select i1 %t39.i1, i32 262144, i32 0 + %t40.i2 = select i1 %t39.i2, i32 131072, i32 0 + %t40.i3 = select i1 %t39.i3, i32 65536, i32 0 + %or.rdx0 = or i32 %t40.i0, %t40.i1 + %or.rdx1 = or i32 %t40.i2, %t40.i3 + %or.rdx2 = or i32 %or.rdx0, %or.rdx1 + ret i32 %or.rdx2 +} From 31ac400e451b5dcbf11a3ece94d58dc3edf24e14 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 24 Sep 2024 16:22:02 +0100 Subject: [PATCH 006/212] [LV] Remove another reference of unrolled parts after 57f5d8f2fe. Continue to clean up some now stale references of unroll parts and related terminology as pointed out post-commit for 06c3a7d. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d296511b97799..09e4d0fcd31f3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9463,7 +9463,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { return; } - // Generate scalar instances for all VF lanes of all UF parts. + // Generate scalar instances for all VF lanes. assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); const unsigned EndLane = State.VF.getKnownMinValue(); for (unsigned Lane = 0; Lane < EndLane; ++Lane) From fa17977c315062646d4d1e01262d68dd69313e61 Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Tue, 24 Sep 2024 11:26:06 -0400 Subject: [PATCH 007/212] [libc][bazel] Remove specializations from libc_math_function. (#109802) There are no more specializations `libc/src/math/x86_64` or `libc/src/math/aarch64` anymore. All implementations are going through the generic implementation. --- .../llvm-project-overlay/libc/BUILD.bazel | 63 +++---------------- .../libc/libc_build_rules.bzl | 11 +--- 2 files changed, 10 insertions(+), 64 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index dbc8c045eea99..140d48c8f9684 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1825,26 +1825,11 @@ libc_math_function( ], ) -libc_math_function( - name = "ceil", - specializations = [ - "generic", - ], -) +libc_math_function(name = "ceil") -libc_math_function( - name = "ceilf", - specializations = [ - "generic", - ], -) +libc_math_function(name = "ceilf") -libc_math_function( - name = "ceill", - specializations = [ - "generic", - ], -) +libc_math_function(name = "ceill") libc_math_function(name = "ceilf128") @@ -2126,19 +2111,9 @@ libc_math_function( ], ) -libc_math_function( - name = "floor", - specializations = [ - "generic", - ], -) +libc_math_function(name = "floor") -libc_math_function( - name = "floorf", - specializations = [ - "generic", - ], -) +libc_math_function(name = "floorf") libc_math_function(name = "floorl") @@ -2639,19 +2614,9 @@ libc_math_function(name = "rintl") libc_math_function(name = "rintf128") -libc_math_function( - name = "round", - specializations = [ - "generic", - ], -) +libc_math_function(name = "round") -libc_math_function( - name = "roundf", - specializations = [ - "generic", - ], -) +libc_math_function(name = "roundf") libc_math_function(name = "roundl") @@ -2850,19 +2815,9 @@ libc_math_function(name = "totalordermagl") libc_math_function(name = "totalordermagf128") -libc_math_function( - name = "trunc", - specializations = [ - "generic", - ], -) +libc_math_function(name = "trunc") -libc_math_function( - name = "truncf", - specializations = [ - "generic", - ], -) +libc_math_function(name = "truncf") libc_math_function(name = "truncl") diff --git a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl index ec3714407cb91..f298f817af83d 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl @@ -129,7 +129,6 @@ def libc_function( def libc_math_function( name, - specializations = None, additional_deps = None): """Add a target for a math function. @@ -142,14 +141,6 @@ def libc_math_function( math function. """ additional_deps = additional_deps or [] - specializations = specializations or ["generic"] - select_map = {} - if "generic" in specializations: - select_map["//conditions:default"] = ["src/math/generic/" + name + ".cpp"] - if "aarch64" in specializations: - select_map[PLATFORM_CPU_ARM64] = ["src/math/aarch64/" + name + ".cpp"] - if "x86_64" in specializations: - select_map[PLATFORM_CPU_X86_64] = ["src/math/x86_64/" + name + ".cpp"] #TODO(michaelrj): Fix the floating point dependencies OLD_FPUTIL_DEPS = [ @@ -166,7 +157,7 @@ def libc_math_function( ] libc_function( name = name, - srcs = selects.with_or(select_map), + srcs = ["src/math/generic/" + name + ".cpp"], hdrs = ["src/math/" + name + ".h"], deps = [":__support_common"] + OLD_FPUTIL_DEPS + additional_deps, ) From 99ade15d192db4afa897a7052a9c73dd42c2b88c Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 24 Sep 2024 11:26:37 -0400 Subject: [PATCH 008/212] Revert "[compiler-rt] prctl interception update, SECCOMP_MODE_FILTER support. (#107722)" This reverts commit b75174d05aa033a382d4c088e96e068a774f46da. Does not build on Android, see comments on https://github.com/llvm/llvm-project/pull/107722 --- .../sanitizer_common_interceptors.inc | 6 ----- .../sanitizer_platform_limits_posix.cpp | 24 +++++++++---------- .../sanitizer_platform_limits_posix.h | 3 +-- .../TestCases/Linux/prctl.cpp | 10 -------- 4 files changed, 12 insertions(+), 31 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index e29cc0c8b390a..bf5bb43018efc 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -1289,9 +1289,6 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3, static const int PR_SCHED_CORE = 62; static const int PR_SCHED_CORE_GET = 0; static const int PR_GET_PDEATHSIG = 2; - static const int PR_SET_SECCOMP = 22; - - static const int SECCOMP_MODE_FILTER = 2; if (option == PR_SET_VMA && arg2 == 0UL) { char *name = (char *)arg5; COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1); @@ -1310,9 +1307,6 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3, COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg5), sizeof(u64)); } else if (res != -1 && option == PR_GET_PDEATHSIG) { COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg2), sizeof(int)); - } else if (res != -1 && option == PR_SET_SECCOMP && - arg2 == SECCOMP_MODE_FILTER) { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg3), struct_sock_fprog_sz); } return res; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 5eeb2a89efa8c..6d61d276d77e3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -117,16 +117,15 @@ typedef struct user_fpregs elf_fpregset_t; #if SANITIZER_LINUX #if SANITIZER_GLIBC #include -# include -# include -# include -# include -# include -# include -# if HAVE_RPC_XDR_H -# include -# endif -# include +#include +#include +#include +#include +#include +#if HAVE_RPC_XDR_H +# include +#endif +#include #else #include #include @@ -532,10 +531,9 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); unsigned struct_audio_buf_info_sz = sizeof(struct audio_buf_info); unsigned struct_ppp_stats_sz = sizeof(struct ppp_stats); - unsigned struct_sock_fprog_sz = sizeof(struct sock_fprog); -# endif // SANITIZER_GLIBC +#endif // SANITIZER_GLIBC -# if !SANITIZER_ANDROID && !SANITIZER_APPLE +#if !SANITIZER_ANDROID && !SANITIZER_APPLE unsigned struct_sioc_sg_req_sz = sizeof(struct sioc_sg_req); unsigned struct_sioc_vif_req_sz = sizeof(struct sioc_vif_req); #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index ca03841ccc198..34bfef1f7ef45 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -1050,8 +1050,7 @@ extern unsigned struct_serial_struct_sz; extern unsigned struct_sockaddr_ax25_sz; extern unsigned struct_unimapdesc_sz; extern unsigned struct_unimapinit_sz; -extern unsigned struct_sock_fprog_sz; -# endif // SANITIZER_LINUX && !SANITIZER_ANDROID +#endif // SANITIZER_LINUX && !SANITIZER_ANDROID extern const unsigned long __sanitizer_bufsiz; diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp index dab1d1b48f868..cbff02d66efa7 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp @@ -4,8 +4,6 @@ #include #include -#include -#include #include #include #include @@ -80,13 +78,5 @@ int main() { } } - sock_filter f[] = {{.code = (BPF_LD | BPF_W | BPF_ABS), - .k = (uint32_t)(SKF_AD_OFF | SKF_AD_CPU)}, - {.code = (BPF_RET | BPF_A), .k = 0}}; - sock_fprog pr = {.len = 2, .filter = f}; - - res = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &pr); - assert(res == -1); - return 0; } From fda01437af339c87ecd226596dd1b5f6d2c6cbfa Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 08:33:24 -0700 Subject: [PATCH 009/212] [Rewrite] Use SmallSetVector (NFC) (#109746) We can combine: SmallVector BlockByCopyDecls; llvm::SmallPtrSet BlockByCopyDeclsPtrSet; into: llvm::SmallSetVector BlockByCopyDecls; Likewise, we can combine: SmallVector BlockByRefDecls; llvm::SmallPtrSet BlockByRefDeclsPtrSet; into: llvm::SmallSetVector BlockByRefDecls; --- clang/lib/Frontend/Rewrite/RewriteObjC.cpp | 50 ++++++---------------- 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index a1e792bf772ba..f49ccf7be68e2 100644 --- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -128,10 +128,8 @@ namespace { SmallVector BlockDeclRefs; // Block related declarations. - SmallVector BlockByCopyDecls; - llvm::SmallPtrSet BlockByCopyDeclsPtrSet; - SmallVector BlockByRefDecls; - llvm::SmallPtrSet BlockByRefDeclsPtrSet; + llvm::SmallSetVector BlockByCopyDecls; + llvm::SmallSetVector BlockByRefDecls; llvm::DenseMap BlockByRefDeclNo; llvm::SmallPtrSet ImportedBlockDecls; llvm::SmallPtrSet ImportedLocalExternalDecls; @@ -3357,7 +3355,7 @@ std::string RewriteObjC::SynthesizeBlockHelperFuncs(BlockExpr *CE, int i, S += VD->getNameAsString(); S += ", (void*)src->"; S += VD->getNameAsString(); - if (BlockByRefDeclsPtrSet.count(VD)) + if (BlockByRefDecls.contains(VD)) S += ", " + utostr(BLOCK_FIELD_IS_BYREF) + "/*BLOCK_FIELD_IS_BYREF*/);"; else if (VD->getType()->isBlockPointerType()) S += ", " + utostr(BLOCK_FIELD_IS_BLOCK) + "/*BLOCK_FIELD_IS_BLOCK*/);"; @@ -3374,7 +3372,7 @@ std::string RewriteObjC::SynthesizeBlockHelperFuncs(BlockExpr *CE, int i, for (ValueDecl *VD : ImportedBlockDecls) { S += "_Block_object_dispose((void*)src->"; S += VD->getNameAsString(); - if (BlockByRefDeclsPtrSet.count(VD)) + if (BlockByRefDecls.contains(VD)) S += ", " + utostr(BLOCK_FIELD_IS_BYREF) + "/*BLOCK_FIELD_IS_BYREF*/);"; else if (VD->getType()->isBlockPointerType()) S += ", " + utostr(BLOCK_FIELD_IS_BLOCK) + "/*BLOCK_FIELD_IS_BLOCK*/);"; @@ -3553,14 +3551,10 @@ void RewriteObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart, DeclRefExpr *Exp = InnerDeclRefs[count++]; ValueDecl *VD = Exp->getDecl(); BlockDeclRefs.push_back(Exp); - if (!VD->hasAttr() && !BlockByCopyDeclsPtrSet.count(VD)) { - BlockByCopyDeclsPtrSet.insert(VD); - BlockByCopyDecls.push_back(VD); - } - if (VD->hasAttr() && !BlockByRefDeclsPtrSet.count(VD)) { - BlockByRefDeclsPtrSet.insert(VD); - BlockByRefDecls.push_back(VD); - } + if (VD->hasAttr()) + BlockByRefDecls.insert(VD); + else + BlockByCopyDecls.insert(VD); // imported objects in the inner blocks not used in the outer // blocks must be copied/disposed in the outer block as well. if (VD->hasAttr() || @@ -3590,9 +3584,7 @@ void RewriteObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart, BlockDeclRefs.clear(); BlockByRefDecls.clear(); - BlockByRefDeclsPtrSet.clear(); BlockByCopyDecls.clear(); - BlockByCopyDeclsPtrSet.clear(); ImportedBlockDecls.clear(); } if (RewriteSC) { @@ -4314,20 +4306,12 @@ void RewriteObjC::CollectBlockDeclRefInfo(BlockExpr *Exp) { if (BlockDeclRefs.size()) { // Unique all "by copy" declarations. for (unsigned i = 0; i < BlockDeclRefs.size(); i++) - if (!BlockDeclRefs[i]->getDecl()->hasAttr()) { - if (!BlockByCopyDeclsPtrSet.count(BlockDeclRefs[i]->getDecl())) { - BlockByCopyDeclsPtrSet.insert(BlockDeclRefs[i]->getDecl()); - BlockByCopyDecls.push_back(BlockDeclRefs[i]->getDecl()); - } - } + if (!BlockDeclRefs[i]->getDecl()->hasAttr()) + BlockByCopyDecls.insert(BlockDeclRefs[i]->getDecl()); // Unique all "by ref" declarations. for (unsigned i = 0; i < BlockDeclRefs.size(); i++) - if (BlockDeclRefs[i]->getDecl()->hasAttr()) { - if (!BlockByRefDeclsPtrSet.count(BlockDeclRefs[i]->getDecl())) { - BlockByRefDeclsPtrSet.insert(BlockDeclRefs[i]->getDecl()); - BlockByRefDecls.push_back(BlockDeclRefs[i]->getDecl()); - } - } + if (BlockDeclRefs[i]->getDecl()->hasAttr()) + BlockByRefDecls.insert(BlockDeclRefs[i]->getDecl()); // Find any imported blocks...they will need special attention. for (unsigned i = 0; i < BlockDeclRefs.size(); i++) if (BlockDeclRefs[i]->getDecl()->hasAttr() || @@ -4358,22 +4342,18 @@ Stmt *RewriteObjC::SynthBlockInitExpr(BlockExpr *Exp, for (unsigned i = 0; i < InnerBlockDeclRefs.size(); i++) { DeclRefExpr *Exp = InnerBlockDeclRefs[i]; ValueDecl *VD = Exp->getDecl(); - if (!VD->hasAttr() && - BlockByCopyDeclsPtrSet.insert(VD).second) { + if (!VD->hasAttr() && BlockByCopyDecls.insert(VD)) { // We need to save the copied-in variables in nested // blocks because it is needed at the end for some of the API // generations. See SynthesizeBlockLiterals routine. InnerDeclRefs.push_back(Exp); countOfInnerDecls++; BlockDeclRefs.push_back(Exp); - BlockByCopyDecls.push_back(VD); } - if (VD->hasAttr() && - BlockByRefDeclsPtrSet.insert(VD).second) { + if (VD->hasAttr() && BlockByRefDecls.insert(VD)) { InnerDeclRefs.push_back(Exp); countOfInnerDecls++; BlockDeclRefs.push_back(Exp); - BlockByRefDecls.push_back(VD); } } // Find any imported blocks...they will need special attention. @@ -4534,9 +4514,7 @@ Stmt *RewriteObjC::SynthBlockInitExpr(BlockExpr *Exp, NewRep); BlockDeclRefs.clear(); BlockByRefDecls.clear(); - BlockByRefDeclsPtrSet.clear(); BlockByCopyDecls.clear(); - BlockByCopyDeclsPtrSet.clear(); ImportedBlockDecls.clear(); return NewRep; } From 36dce5091e384087f5d1ceaf3777ac14f41087e4 Mon Sep 17 00:00:00 2001 From: spupyrev Date: Tue, 24 Sep 2024 08:36:57 -0700 Subject: [PATCH 010/212] [CodeLayout][NFC] Format and minor refactoring of MBP (#109729) This PR has two (NFC) commits: - clang-format MBP - move a part of tail duplication and block aligning into helper functions for better readability. --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 555 ++++++++++----------- 1 file changed, 274 insertions(+), 281 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index be783bc4e2973..7dcb287b38734 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -119,10 +119,10 @@ static cl::opt LoopToColdBlockRatio( "(frequency of block) is greater than this ratio"), cl::init(5), cl::Hidden); -static cl::opt ForceLoopColdBlock( - "force-loop-cold-block", - cl::desc("Force outlining cold blocks from loops."), - cl::init(false), cl::Hidden); +static cl::opt + ForceLoopColdBlock("force-loop-cold-block", + cl::desc("Force outlining cold blocks from loops."), + cl::init(false), cl::Hidden); static cl::opt PreciseRotationCost("precise-rotation-cost", @@ -147,43 +147,43 @@ static cl::opt JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); static cl::opt -TailDupPlacement("tail-dup-placement", - cl::desc("Perform tail duplication during placement. " - "Creates more fallthrough opportunites in " - "outline branches."), - cl::init(true), cl::Hidden); + TailDupPlacement("tail-dup-placement", + cl::desc("Perform tail duplication during placement. " + "Creates more fallthrough opportunites in " + "outline branches."), + cl::init(true), cl::Hidden); static cl::opt -BranchFoldPlacement("branch-fold-placement", - cl::desc("Perform branch folding during placement. " - "Reduces code size."), - cl::init(true), cl::Hidden); + BranchFoldPlacement("branch-fold-placement", + cl::desc("Perform branch folding during placement. " + "Reduces code size."), + cl::init(true), cl::Hidden); // Heuristic for tail duplication. static cl::opt TailDupPlacementThreshold( "tail-dup-placement-threshold", cl::desc("Instruction cutoff for tail duplication during layout. " "Tail merging during layout is forced to have a threshold " - "that won't conflict."), cl::init(2), - cl::Hidden); + "that won't conflict."), + cl::init(2), cl::Hidden); // Heuristic for aggressive tail duplication. static cl::opt TailDupPlacementAggressiveThreshold( "tail-dup-placement-aggressive-threshold", cl::desc("Instruction cutoff for aggressive tail duplication during " "layout. Used at -O3. Tail merging during layout is forced to " - "have a threshold that won't conflict."), cl::init(4), - cl::Hidden); + "have a threshold that won't conflict."), + cl::init(4), cl::Hidden); // Heuristic for tail duplication. static cl::opt TailDupPlacementPenalty( "tail-dup-placement-penalty", - cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. " - "Copying can increase fallthrough, but it also increases icache " - "pressure. This parameter controls the penalty to account for that. " - "Percent as integer."), - cl::init(2), - cl::Hidden); + cl::desc( + "Cost penalty for blocks that can avoid breaking CFG by copying. " + "Copying can increase fallthrough, but it also increases icache " + "pressure. This parameter controls the penalty to account for that. " + "Percent as integer."), + cl::init(2), cl::Hidden); // Heuristic for tail duplication if profile count is used in cost model. static cl::opt TailDupProfilePercentThreshold( @@ -198,8 +198,7 @@ static cl::opt TriangleChainCount( "triangle-chain-count", cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the " "triangle tail duplication heuristic to kick in. 0 to disable."), - cl::init(2), - cl::Hidden); + cl::init(2), cl::Hidden); // Use case: When block layout is visualized after MBP pass, the basic blocks // are labeled in layout order; meanwhile blocks could be numbered in a @@ -292,8 +291,8 @@ class BlockChain { iterator end() { return Blocks.end(); } const_iterator end() const { return Blocks.end(); } - bool remove(MachineBasicBlock* BB) { - for(iterator i = begin(); i != end(); ++i) { + bool remove(MachineBasicBlock *BB) { + for (iterator i = begin(); i != end(); ++i) { if (*i == BB) { Blocks.erase(i); return true; @@ -405,6 +404,8 @@ class MachineBlockPlacement : public MachineFunctionPass { ProfileSummaryInfo *PSI = nullptr; + TargetPassConfig *PassConfig = nullptr; + /// Duplicator used to duplicate tails during placement. /// /// Placement decisions can open up new tail duplication opportunities, but @@ -415,6 +416,8 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Partial tail duplication threshold. BlockFrequency DupThreshold; + unsigned TailDupSize; + /// True: use block profile count to compute tail duplication cost. /// False: use block frequency to compute tail duplication cost. bool UseProfileCount = false; @@ -459,26 +462,24 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Scale the DupThreshold according to basic block size. BlockFrequency scaleThreshold(MachineBasicBlock *BB); - void initDupThreshold(); + void initTailDupThreshold(); /// Decrease the UnscheduledPredecessors count for all blocks in chain, and /// if the count goes to 0, add them to the appropriate work list. - void markChainSuccessors( - const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB, - const BlockFilterSet *BlockFilter = nullptr); + void markChainSuccessors(const BlockChain &Chain, + const MachineBasicBlock *LoopHeaderBB, + const BlockFilterSet *BlockFilter = nullptr); /// Decrease the UnscheduledPredecessors count for a single block, and /// if the count goes to 0, add them to the appropriate work list. - void markBlockSuccessors( - const BlockChain &Chain, const MachineBasicBlock *BB, - const MachineBasicBlock *LoopHeaderBB, - const BlockFilterSet *BlockFilter = nullptr); + void markBlockSuccessors(const BlockChain &Chain, const MachineBasicBlock *BB, + const MachineBasicBlock *LoopHeaderBB, + const BlockFilterSet *BlockFilter = nullptr); BranchProbability - collectViableSuccessors( - const MachineBasicBlock *BB, const BlockChain &Chain, - const BlockFilterSet *BlockFilter, - SmallVector &Successors); + collectViableSuccessors(const MachineBasicBlock *BB, const BlockChain &Chain, + const BlockFilterSet *BlockFilter, + SmallVector &Successors); bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred, BlockFilterSet *BlockFilter); void findDuplicateCandidates(SmallVectorImpl &Candidates, @@ -496,16 +497,19 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineFunction::iterator &PrevUnplacedBlockIt, BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, bool &DuplicatedToLPred); - bool hasBetterLayoutPredecessor( - const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - const BlockChain &SuccChain, BranchProbability SuccProb, - BranchProbability RealSuccProb, const BlockChain &Chain, - const BlockFilterSet *BlockFilter); - BlockAndTailDupResult selectBestSuccessor( - const MachineBasicBlock *BB, const BlockChain &Chain, - const BlockFilterSet *BlockFilter); - MachineBasicBlock *selectBestCandidateBlock( - const BlockChain &Chain, SmallVectorImpl &WorkList); + bool hasBetterLayoutPredecessor(const MachineBasicBlock *BB, + const MachineBasicBlock *Succ, + const BlockChain &SuccChain, + BranchProbability SuccProb, + BranchProbability RealSuccProb, + const BlockChain &Chain, + const BlockFilterSet *BlockFilter); + BlockAndTailDupResult selectBestSuccessor(const MachineBasicBlock *BB, + const BlockChain &Chain, + const BlockFilterSet *BlockFilter); + MachineBasicBlock * + selectBestCandidateBlock(const BlockChain &Chain, + SmallVectorImpl &WorkList); MachineBasicBlock * getFirstUnplacedBlock(const BlockChain &PlacedChain, MachineFunction::iterator &PrevUnplacedBlockIt); @@ -536,20 +540,19 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop, - const MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopTop( - const MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopExit( - const MachineLoop &L, const BlockFilterSet &LoopBlockSet, - BlockFrequency &ExitFreq); + const MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopTop(const MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopExit(const MachineLoop &L, + const BlockFilterSet &LoopBlockSet, + BlockFrequency &ExitFreq); BlockFilterSet collectLoopBlockSet(const MachineLoop &L); void buildLoopChains(const MachineLoop &L); - void rotateLoop( - BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, - BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet); - void rotateLoopWithProfile( - BlockChain &LoopChain, const MachineLoop &L, - const BlockFilterSet &LoopBlockSet); + void rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, + BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet); + void rotateLoopWithProfile(BlockChain &LoopChain, const MachineLoop &L, + const BlockFilterSet &LoopBlockSet); void buildCFGChains(); void optimizeBranches(); void alignBlocks(); @@ -558,10 +561,10 @@ class MachineBlockPlacement : public MachineFunctionPass { bool shouldTailDuplicate(MachineBasicBlock *BB); /// Check the edge frequencies to see if tail duplication will increase /// fallthroughs. - bool isProfitableToTailDup( - const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - BranchProbability QProb, - const BlockChain &Chain, const BlockFilterSet *BlockFilter); + bool isProfitableToTailDup(const MachineBasicBlock *BB, + const MachineBasicBlock *Succ, + BranchProbability QProb, const BlockChain &Chain, + const BlockFilterSet *BlockFilter); /// Check for a trellis layout. bool isTrellis(const MachineBasicBlock *BB, @@ -582,9 +585,10 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Returns true if a block can tail duplicate into all unplaced /// predecessors. Filters based on loop. - bool canTailDuplicateUnplacedPreds( - const MachineBasicBlock *BB, MachineBasicBlock *Succ, - const BlockChain &Chain, const BlockFilterSet *BlockFilter); + bool canTailDuplicateUnplacedPreds(const MachineBasicBlock *BB, + MachineBasicBlock *Succ, + const BlockChain &Chain, + const BlockFilterSet *BlockFilter); /// Find chains of triangles to tail-duplicate where a global analysis works, /// but a local analysis would not find them. @@ -802,8 +806,8 @@ bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) { /// Compare 2 BlockFrequency's with a small penalty for \p A. /// In order to be conservative, we apply a X% penalty to account for /// increased icache pressure and static heuristics. For small frequencies -/// we use only the numerators to improve accuracy. For simplicity, we assume the -/// penalty is less than 100% +/// we use only the numerators to improve accuracy. For simplicity, we assume +/// the penalty is less than 100% /// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere. static bool greaterWithBias(BlockFrequency A, BlockFrequency B, BlockFrequency EntryFreq) { @@ -819,8 +823,8 @@ static bool greaterWithBias(BlockFrequency A, BlockFrequency B, /// considering duplication. bool MachineBlockPlacement::isProfitableToTailDup( const MachineBasicBlock *BB, const MachineBasicBlock *Succ, - BranchProbability QProb, - const BlockChain &Chain, const BlockFilterSet *BlockFilter) { + BranchProbability QProb, const BlockChain &Chain, + const BlockFilterSet *BlockFilter) { // We need to do a probability calculation to make sure this is profitable. // First: does succ have a successor that post-dominates? This affects the // calculation. The 2 relevant cases are: @@ -876,12 +880,12 @@ bool MachineBlockPlacement::isProfitableToTailDup( // from BB. auto SuccBestPred = BlockFrequency(0); for (MachineBasicBlock *SuccPred : Succ->predecessors()) { - if (SuccPred == Succ || SuccPred == BB - || BlockToChain[SuccPred] == &Chain - || (BlockFilter && !BlockFilter->count(SuccPred))) + if (SuccPred == Succ || SuccPred == BB || + BlockToChain[SuccPred] == &Chain || + (BlockFilter && !BlockFilter->count(SuccPred))) continue; - auto Freq = MBFI->getBlockFreq(SuccPred) - * MBPI->getEdgeProbability(SuccPred, Succ); + auto Freq = + MBFI->getBlockFreq(SuccPred) * MBPI->getEdgeProbability(SuccPred, Succ); if (Freq > SuccBestPred) SuccBestPred = Freq; } @@ -1137,7 +1141,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( } // We have already computed the optimal edge for the other side of the // trellis. - ComputedEdges[BestB.Src] = { BestB.Dest, false }; + ComputedEdges[BestB.Src] = {BestB.Dest, false}; auto TrellisSucc = BestA.Dest; LLVM_DEBUG(BranchProbability SuccProb = getAdjustedProbability( @@ -1169,8 +1173,8 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( // Make sure all unplaced and unfiltered predecessors can be // tail-duplicated into. // Skip any blocks that are already placed or not in this loop. - if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) - || (BlockToChain[Pred] == &Chain && !Succ->succ_empty())) + if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred)) || + (BlockToChain[Pred] == &Chain && !Succ->succ_empty())) continue; if (!TailDup.canTailDuplicate(Succ, Pred)) { if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors)) @@ -1289,9 +1293,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { unsigned count() const { return Edges.size() - 1; } - MachineBasicBlock *getKey() const { - return Edges.back(); - } + MachineBasicBlock *getKey() const { return Edges.back(); } }; if (TriangleChainCount == 0) @@ -1326,7 +1328,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { bool CanTailDuplicate = true; // If PDom can't tail-duplicate into it's non-BB predecessors, then this // isn't the kind of triangle we're looking for. - for (MachineBasicBlock* Pred : PDom->predecessors()) { + for (MachineBasicBlock *Pred : PDom->predecessors()) { if (Pred == &BB) continue; if (!TailDup.canTailDuplicate(PDom, Pred)) { @@ -1386,8 +1388,8 @@ void MachineBlockPlacement::precomputeTriangleChains() { // When profile is not present, return the StaticLikelyProb. // When profile is available, we need to handle the triangle-shape CFG. -static BranchProbability getLayoutSuccessorProbThreshold( - const MachineBasicBlock *BB) { +static BranchProbability +getLayoutSuccessorProbThreshold(const MachineBasicBlock *BB) { if (!BB->getParent()->getFunction().hasProfileData()) return BranchProbability(StaticLikelyProb, 100); if (BB->succ_size() == 2) { @@ -1551,8 +1553,8 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( for (MachineBasicBlock *Pred : Succ->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (Pred == Succ || PredChain == &SuccChain || - (BlockFilter && !BlockFilter->count(Pred)) || - PredChain == &Chain || Pred != *std::prev(PredChain->end()) || + (BlockFilter && !BlockFilter->count(Pred)) || PredChain == &Chain || + Pred != *std::prev(PredChain->end()) || // This check is redundant except for look ahead. This function is // called for lookahead by isProfitableToTailDup when BB hasn't been // placed yet. @@ -1599,12 +1601,12 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( /// \returns The best successor block found, or null if none are viable, along /// with a boolean indicating if tail duplication is necessary. MachineBlockPlacement::BlockAndTailDupResult -MachineBlockPlacement::selectBestSuccessor( - const MachineBasicBlock *BB, const BlockChain &Chain, - const BlockFilterSet *BlockFilter) { +MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB, + const BlockChain &Chain, + const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(StaticLikelyProb, 100); - BlockAndTailDupResult BestSucc = { nullptr, false }; + BlockAndTailDupResult BestSucc = {nullptr, false}; auto BestProb = BranchProbability::getZero(); SmallVector Successors; @@ -1684,8 +1686,8 @@ MachineBlockPlacement::selectBestSuccessor( std::tie(DupProb, Succ) = Tup; if (DupProb < BestProb) break; - if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) - && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) { + if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter) && + (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) { LLVM_DEBUG(dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: " << DupProb << " (Tail Duplicate)\n"); @@ -1822,8 +1824,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( } void MachineBlockPlacement::fillWorkLists( - const MachineBasicBlock *MBB, - SmallPtrSetImpl &UpdatedPreds, + const MachineBasicBlock *MBB, SmallPtrSetImpl &UpdatedPreds, const BlockFilterSet *BlockFilter = nullptr) { BlockChain &Chain = *BlockToChain[MBB]; if (!UpdatedPreds.insert(&Chain).second) @@ -1854,9 +1855,9 @@ void MachineBlockPlacement::fillWorkLists( BlockWorkList.push_back(BB); } -void MachineBlockPlacement::buildChain( - const MachineBasicBlock *HeadBB, BlockChain &Chain, - BlockFilterSet *BlockFilter) { +void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB, + BlockChain &Chain, + BlockFilterSet *BlockFilter) { assert(HeadBB && "BB must not be null.\n"); assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n"); MachineFunction::iterator PrevUnplacedBlockIt = F->begin(); @@ -1872,16 +1873,14 @@ void MachineBlockPlacement::buildChain( assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop."); assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain."); - // Look for the best viable successor if there is one to place immediately // after this block. auto Result = selectBestSuccessor(BB, Chain, BlockFilter); - MachineBasicBlock* BestSucc = Result.BB; + MachineBasicBlock *BestSucc = Result.BB; bool ShouldTailDup = Result.ShouldTailDup; if (allowTailDupPlacement()) - ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc, - Chain, - BlockFilter)); + ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds( + BB, BestSucc, Chain, BlockFilter)); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at @@ -1918,8 +1917,8 @@ void MachineBlockPlacement::buildChain( // Place this block, updating the datastructures to reflect its placement. BlockChain &SuccChain = *BlockToChain[BestSucc]; - // Zero out UnscheduledPredecessors for the successor we're about to merge in case - // we selected a successor that didn't fit naturally into the CFG. + // Zero out UnscheduledPredecessors for the successor we're about to merge + // in case we selected a successor that didn't fit naturally into the CFG. SuccChain.UnscheduledPredecessors = 0; LLVM_DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " << getBlockName(BestSucc) << "\n"); @@ -1946,10 +1945,8 @@ void MachineBlockPlacement::buildChain( // If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't // layout the other successor below it, so it can't reduce taken branch. // In this case we keep its original layout. -bool -MachineBlockPlacement::canMoveBottomBlockToTop( - const MachineBasicBlock *BottomBlock, - const MachineBasicBlock *OldTop) { +bool MachineBlockPlacement::canMoveBottomBlockToTop( + const MachineBasicBlock *BottomBlock, const MachineBasicBlock *OldTop) { if (BottomBlock->pred_size() != 1) return true; MachineBasicBlock *Pred = *BottomBlock->pred_begin(); @@ -1967,9 +1964,8 @@ MachineBlockPlacement::canMoveBottomBlockToTop( // Find out the possible fall through frequence to the top of a loop. BlockFrequency -MachineBlockPlacement::TopFallThroughFreq( - const MachineBasicBlock *Top, - const BlockFilterSet &LoopBlockSet) { +MachineBlockPlacement::TopFallThroughFreq(const MachineBasicBlock *Top, + const BlockFilterSet &LoopBlockSet) { BlockFrequency MaxFreq = BlockFrequency(0); for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; @@ -1991,8 +1987,8 @@ MachineBlockPlacement::TopFallThroughFreq( } } if (TopOK) { - BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) * - MBPI->getEdgeProbability(Pred, Top); + BlockFrequency EdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Top); if (EdgeFreq > MaxFreq) MaxFreq = EdgeFreq; } @@ -2022,19 +2018,16 @@ MachineBlockPlacement::TopFallThroughFreq( // |- // V // -BlockFrequency -MachineBlockPlacement::FallThroughGains( - const MachineBasicBlock *NewTop, - const MachineBasicBlock *OldTop, - const MachineBasicBlock *ExitBB, - const BlockFilterSet &LoopBlockSet) { +BlockFrequency MachineBlockPlacement::FallThroughGains( + const MachineBasicBlock *NewTop, const MachineBasicBlock *OldTop, + const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet) { BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet); BlockFrequency FallThrough2Exit = BlockFrequency(0); if (ExitBB) - FallThrough2Exit = MBFI->getBlockFreq(NewTop) * - MBPI->getEdgeProbability(NewTop, ExitBB); - BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) * - MBPI->getEdgeProbability(NewTop, OldTop); + FallThrough2Exit = + MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, ExitBB); + BlockFrequency BackEdgeFreq = + MBFI->getBlockFreq(NewTop) * MBPI->getEdgeProbability(NewTop, OldTop); // Find the best Pred of NewTop. MachineBasicBlock *BestPred = nullptr; @@ -2113,10 +2106,8 @@ MachineBlockPlacement::FallThroughGains( /// At the same time, move it before old top increases the taken branch /// to loop exit block, so the reduced taken branch will be compared with /// the increased taken branch to the loop exit block. -MachineBasicBlock * -MachineBlockPlacement::findBestLoopTopHelper( - MachineBasicBlock *OldTop, - const MachineLoop &L, +MachineBasicBlock *MachineBlockPlacement::findBestLoopTopHelper( + MachineBasicBlock *OldTop, const MachineLoop &L, const BlockFilterSet &LoopBlockSet) { // Check that the header hasn't been fused with a preheader block due to // crazy branches. If it has, we need to start with the header at the top to @@ -2153,8 +2144,8 @@ MachineBlockPlacement::findBestLoopTopHelper( if (!canMoveBottomBlockToTop(Pred, OldTop)) continue; - BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB, - LoopBlockSet); + BlockFrequency Gains = + FallThroughGains(Pred, OldTop, OtherBB, LoopBlockSet); if ((Gains > BlockFrequency(0)) && (Gains > BestGains || ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) { @@ -2204,7 +2195,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, OldTop = NewTop; NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet); if (NewTop != OldTop) - ComputedEdges[NewTop] = { OldTop, false }; + ComputedEdges[NewTop] = {OldTop, false}; } return NewTop; } @@ -2336,10 +2327,8 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, /// /// 1. Look for a Pred that can be layout before Top. /// 2. Check if Top is the most possible successor of Pred. -bool -MachineBlockPlacement::hasViableTopFallthrough( - const MachineBasicBlock *Top, - const BlockFilterSet &LoopBlockSet) { +bool MachineBlockPlacement::hasViableTopFallthrough( + const MachineBasicBlock *Top, const BlockFilterSet &LoopBlockSet) { for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (!LoopBlockSet.count(Pred) && @@ -2491,7 +2480,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( if (!LoopBlockSet.count(Pred) && (!PredChain || Pred == *std::prev(PredChain->end()))) { auto EdgeFreq = MBFI->getBlockFreq(Pred) * - MBPI->getEdgeProbability(Pred, ChainHeaderBB); + MBPI->getEdgeProbability(Pred, ChainHeaderBB); auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost); // If the predecessor has only an unconditional jump to the header, we // need to consider the cost of this jump. @@ -2951,12 +2940,16 @@ void MachineBlockPlacement::alignBlocks() { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F->getFunction().hasMinSize() || - (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize())) - return; + if (!AlignAllBlock && !AlignAllNonFallThruBlocks) { + if (F->getFunction().hasMinSize() || + (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize())) + return; + } + BlockChain &FunctionChain = *BlockToChain[&F->front()]; + // Empty chain. if (FunctionChain.begin() == FunctionChain.end()) - return; // Empty chain. + return; const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front()); @@ -3052,6 +3045,33 @@ void MachineBlockPlacement::alignBlocks() { DetermineMaxAlignmentPadding(); } } + + const bool HasMaxBytesOverride = + MaxBytesForAlignmentOverride.getNumOccurrences() > 0; + + if (AlignAllBlock) + // Align all of the blocks in the function to a specific alignment. + for (MachineBasicBlock &MBB : *F) { + if (HasMaxBytesOverride) + MBB.setAlignment(Align(1ULL << AlignAllBlock), + MaxBytesForAlignmentOverride); + else + MBB.setAlignment(Align(1ULL << AlignAllBlock)); + } + else if (AlignAllNonFallThruBlocks) { + // Align all of the blocks that have no fall-through predecessors to a + // specific alignment. + for (auto MBI = std::next(F->begin()), MBE = F->end(); MBI != MBE; ++MBI) { + auto LayoutPred = std::prev(MBI); + if (!LayoutPred->isSuccessor(&*MBI)) { + if (HasMaxBytesOverride) + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks), + MaxBytesForAlignmentOverride); + else + MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); + } + } + } } /// Tail duplicate \p BB into (some) predecessors if profitable, repeating if @@ -3142,67 +3162,66 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( // This has to be a callback because none of it can be done after // BB is deleted. bool Removed = false; - auto RemovalCallback = - [&](MachineBasicBlock *RemBB) { - // Signal to outer function - Removed = true; - - // Conservative default. - bool InWorkList = true; - // Remove from the Chain and Chain Map - if (BlockToChain.count(RemBB)) { - BlockChain *Chain = BlockToChain[RemBB]; - InWorkList = Chain->UnscheduledPredecessors == 0; - Chain->remove(RemBB); - BlockToChain.erase(RemBB); - } - - // Handle the unplaced block iterator - if (&(*PrevUnplacedBlockIt) == RemBB) { - PrevUnplacedBlockIt++; - } - - // Handle the Work Lists - if (InWorkList) { - SmallVectorImpl &RemoveList = BlockWorkList; - if (RemBB->isEHPad()) - RemoveList = EHPadWorkList; - llvm::erase(RemoveList, RemBB); - } - - // Handle the filter set - if (BlockFilter) { - auto It = llvm::find(*BlockFilter, RemBB); - // Erase RemBB from BlockFilter, and keep PrevUnplacedBlockInFilterIt - // pointing to the same element as before. - if (It != BlockFilter->end()) { - if (It < PrevUnplacedBlockInFilterIt) { - const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt; - // BlockFilter is a SmallVector so all elements after RemBB are - // shifted to the front by 1 after its deletion. - auto Distance = PrevUnplacedBlockInFilterIt - It - 1; - PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance; - assert(*PrevUnplacedBlockInFilterIt == PrevBB); - (void)PrevBB; - } else if (It == PrevUnplacedBlockInFilterIt) - // The block pointed by PrevUnplacedBlockInFilterIt is erased, we - // have to set it to the next element. - PrevUnplacedBlockInFilterIt = BlockFilter->erase(It); - else - BlockFilter->erase(It); - } - } + auto RemovalCallback = [&](MachineBasicBlock *RemBB) { + // Signal to outer function + Removed = true; + + // Conservative default. + bool InWorkList = true; + // Remove from the Chain and Chain Map + if (BlockToChain.count(RemBB)) { + BlockChain *Chain = BlockToChain[RemBB]; + InWorkList = Chain->UnscheduledPredecessors == 0; + Chain->remove(RemBB); + BlockToChain.erase(RemBB); + } + + // Handle the unplaced block iterator + if (&(*PrevUnplacedBlockIt) == RemBB) { + PrevUnplacedBlockIt++; + } + + // Handle the Work Lists + if (InWorkList) { + SmallVectorImpl &RemoveList = BlockWorkList; + if (RemBB->isEHPad()) + RemoveList = EHPadWorkList; + llvm::erase(RemoveList, RemBB); + } + + // Handle the filter set + if (BlockFilter) { + auto It = llvm::find(*BlockFilter, RemBB); + // Erase RemBB from BlockFilter, and keep PrevUnplacedBlockInFilterIt + // pointing to the same element as before. + if (It != BlockFilter->end()) { + if (It < PrevUnplacedBlockInFilterIt) { + const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt; + // BlockFilter is a SmallVector so all elements after RemBB are + // shifted to the front by 1 after its deletion. + auto Distance = PrevUnplacedBlockInFilterIt - It - 1; + PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance; + assert(*PrevUnplacedBlockInFilterIt == PrevBB); + (void)PrevBB; + } else if (It == PrevUnplacedBlockInFilterIt) + // The block pointed by PrevUnplacedBlockInFilterIt is erased, we + // have to set it to the next element. + PrevUnplacedBlockInFilterIt = BlockFilter->erase(It); + else + BlockFilter->erase(It); + } + } - // Remove the block from loop info. - MLI->removeBlock(RemBB); - if (RemBB == PreferredLoopExit) - PreferredLoopExit = nullptr; + // Remove the block from loop info. + MLI->removeBlock(RemBB); + if (RemBB == PreferredLoopExit) + PreferredLoopExit = nullptr; - LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: " - << getBlockName(RemBB) << "\n"); - }; + LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: " << getBlockName(RemBB) + << "\n"); + }; auto RemovalCallbackRef = - function_ref(RemovalCallback); + function_ref(RemovalCallback); SmallVector DuplicatedPreds; bool IsSimple = TailDup.isSimpleBB(BB); @@ -3223,11 +3242,11 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( DuplicatedToLPred = false; for (MachineBasicBlock *Pred : DuplicatedPreds) { // We're only looking for unscheduled predecessors that match the filter. - BlockChain* PredChain = BlockToChain[Pred]; + BlockChain *PredChain = BlockToChain[Pred]; if (Pred == LPred) DuplicatedToLPred = true; - if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) - || PredChain == &Chain) + if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred)) || + PredChain == &Chain) continue; for (MachineBasicBlock *NewSucc : Pred->successors()) { if (BlockFilter && !BlockFilter->count(NewSucc)) @@ -3297,8 +3316,7 @@ bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB, // Find out the predecessors of BB and BB can be beneficially duplicated into // them. void MachineBlockPlacement::findDuplicateCandidates( - SmallVectorImpl &Candidates, - MachineBasicBlock *BB, + SmallVectorImpl &Candidates, MachineBasicBlock *BB, BlockFilterSet *BlockFilter) { MachineBasicBlock *Fallthrough = nullptr; BranchProbability DefaultBranchProb = BranchProbability::getZero(); @@ -3407,31 +3425,53 @@ void MachineBlockPlacement::findDuplicateCandidates( } } -void MachineBlockPlacement::initDupThreshold() { +void MachineBlockPlacement::initTailDupThreshold() { DupThreshold = BlockFrequency(0); - if (!F->getFunction().hasProfileData()) - return; + if (F->getFunction().hasProfileData()) { + // We prefer to use prifile count. + uint64_t HotThreshold = PSI->getOrCompHotCountThreshold(); + if (HotThreshold != UINT64_MAX) { + UseProfileCount = true; + DupThreshold = + BlockFrequency(HotThreshold * TailDupProfilePercentThreshold / 100); + } else { + // Profile count is not available, we can use block frequency instead. + BlockFrequency MaxFreq = BlockFrequency(0); + for (MachineBasicBlock &MBB : *F) { + BlockFrequency Freq = MBFI->getBlockFreq(&MBB); + if (Freq > MaxFreq) + MaxFreq = Freq; + } - // We prefer to use prifile count. - uint64_t HotThreshold = PSI->getOrCompHotCountThreshold(); - if (HotThreshold != UINT64_MAX) { - UseProfileCount = true; - DupThreshold = - BlockFrequency(HotThreshold * TailDupProfilePercentThreshold / 100); - return; + BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); + DupThreshold = BlockFrequency(MaxFreq * ThresholdProb); + UseProfileCount = false; + } } - // Profile count is not available, we can use block frequency instead. - BlockFrequency MaxFreq = BlockFrequency(0); - for (MachineBasicBlock &MBB : *F) { - BlockFrequency Freq = MBFI->getBlockFreq(&MBB); - if (Freq > MaxFreq) - MaxFreq = Freq; + TailDupSize = TailDupPlacementThreshold; + // If only the aggressive threshold is explicitly set, use it. + if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 && + TailDupPlacementThreshold.getNumOccurrences() == 0) + TailDupSize = TailDupPlacementAggressiveThreshold; + + // For aggressive optimization, we can adjust some thresholds to be less + // conservative. + if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) { + // At O3 we should be more willing to copy blocks for tail duplication. This + // increases size pressure, so we only do it at O3 + // Do this unless only the regular threshold is explicitly set. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 || + TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0) + TailDupSize = TailDupPlacementAggressiveThreshold; } - BranchProbability ThresholdProb(TailDupPlacementPenalty, 100); - DupThreshold = BlockFrequency(MaxFreq * ThresholdProb); - UseProfileCount = false; + // If there's no threshold provided through options, query the target + // information for a threshold instead. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 && + (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive || + TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0)) + TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel()); } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { @@ -3451,8 +3491,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getSubtarget().getTargetLowering(); MPDT = nullptr; PSI = &getAnalysis().getPSI(); - - initDupThreshold(); + PassConfig = &getAnalysis(); // Initialize PreferredLoopExit to nullptr here since it may never be set if // there are no MachineLoops. @@ -3463,38 +3502,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { assert(ComputedEdges.empty() && "Computed Edge map should be empty before starting placement."); - unsigned TailDupSize = TailDupPlacementThreshold; - // If only the aggressive threshold is explicitly set, use it. - if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 && - TailDupPlacementThreshold.getNumOccurrences() == 0) - TailDupSize = TailDupPlacementAggressiveThreshold; - - TargetPassConfig *PassConfig = &getAnalysis(); - // For aggressive optimization, we can adjust some thresholds to be less - // conservative. - if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) { - // At O3 we should be more willing to copy blocks for tail duplication. This - // increases size pressure, so we only do it at O3 - // Do this unless only the regular threshold is explicitly set. - if (TailDupPlacementThreshold.getNumOccurrences() == 0 || - TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0) - TailDupSize = TailDupPlacementAggressiveThreshold; - } - - // If there's no threshold provided through options, query the target - // information for a threshold instead. - if (TailDupPlacementThreshold.getNumOccurrences() == 0 && - (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive || - TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0)) - TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel()); + // Initialize tail duplication thresholds. + initTailDupThreshold(); + // Apply tail duplication. if (allowTailDupPlacement()) { MPDT = &getAnalysis().getPostDomTree(); bool OptForSize = MF.getFunction().hasOptSize() || llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI()); if (OptForSize) TailDupSize = 1; - bool PreRegAlloc = false; + const bool PreRegAlloc = false; TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); @@ -3505,12 +3523,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { // Changing the layout can create new tail merging opportunities. // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structured CFG. - bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && - PassConfig->getEnableTailMerge() && - BranchFoldPlacement; + const bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && + PassConfig->getEnableTailMerge() && + BranchFoldPlacement && MF.size() > 3; // No tail merging opportunities if the block number is less than four. - if (MF.size() > 3 && EnableTailMerge) { - unsigned TailMergeSize = TailDupSize + 1; + if (EnableTailMerge) { + const unsigned TailMergeSize = TailDupSize + 1; BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, PSI, TailMergeSize); @@ -3545,32 +3563,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { ComputedEdges.clear(); ChainAllocator.DestroyAll(); - bool HasMaxBytesOverride = - MaxBytesForAlignmentOverride.getNumOccurrences() > 0; - - if (AlignAllBlock) - // Align all of the blocks in the function to a specific alignment. - for (MachineBasicBlock &MBB : MF) { - if (HasMaxBytesOverride) - MBB.setAlignment(Align(1ULL << AlignAllBlock), - MaxBytesForAlignmentOverride); - else - MBB.setAlignment(Align(1ULL << AlignAllBlock)); - } - else if (AlignAllNonFallThruBlocks) { - // Align all of the blocks that have no fall-through predecessors to a - // specific alignment. - for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) { - auto LayoutPred = std::prev(MBI); - if (!LayoutPred->isSuccessor(&*MBI)) { - if (HasMaxBytesOverride) - MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks), - MaxBytesForAlignmentOverride); - else - MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks)); - } - } - } + // View the function. if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || F->getFunction().getName() == ViewBlockFreqFuncName)) { From f12d72d9c95ea2cd82d3e2a70c8cd5d5e88e2060 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 24 Sep 2024 16:41:53 +0100 Subject: [PATCH 011/212] [PtrInfo] Use plain pointer over 'PointerIntPair'. NFC (#109772) This PtrInfo holds a pointer and whether it has been set. We can represent this sentinal value as a nullptr, as we would for most pointers. This assumes that the value is never set to nullptr, but from the uses that appears to be true and already assumed. --- llvm/include/llvm/Analysis/PtrUseVisitor.h | 33 ++++++++++------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h index 237d328721609..bbe2741f44fc3 100644 --- a/llvm/include/llvm/Analysis/PtrUseVisitor.h +++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -52,57 +52,54 @@ class PtrUseVisitorBase { /// analysis and whether the visit completed or aborted early. class PtrInfo { public: - PtrInfo() : AbortedInfo(nullptr, false), EscapedInfo(nullptr, false) {} - /// Reset the pointer info, clearing all state. void reset() { - AbortedInfo.setPointer(nullptr); - AbortedInfo.setInt(false); - EscapedInfo.setPointer(nullptr); - EscapedInfo.setInt(false); + AbortedInfo = nullptr; + EscapedInfo = nullptr; } /// Did we abort the visit early? - bool isAborted() const { return AbortedInfo.getInt(); } + bool isAborted() const { return AbortedInfo != nullptr; } /// Is the pointer escaped at some point? - bool isEscaped() const { return EscapedInfo.getInt(); } + bool isEscaped() const { return EscapedInfo != nullptr; } /// Get the instruction causing the visit to abort. /// \returns a pointer to the instruction causing the abort if one is /// available; otherwise returns null. - Instruction *getAbortingInst() const { return AbortedInfo.getPointer(); } + Instruction *getAbortingInst() const { return AbortedInfo; } /// Get the instruction causing the pointer to escape. /// \returns a pointer to the instruction which escapes the pointer if one /// is available; otherwise returns null. - Instruction *getEscapingInst() const { return EscapedInfo.getPointer(); } + Instruction *getEscapingInst() const { return EscapedInfo; } /// Mark the visit as aborted. Intended for use in a void return. /// \param I The instruction which caused the visit to abort, if available. - void setAborted(Instruction *I = nullptr) { - AbortedInfo.setInt(true); - AbortedInfo.setPointer(I); + void setAborted(Instruction *I) { + assert(I && "Expected a valid pointer in setAborted"); + AbortedInfo = I; } /// Mark the pointer as escaped. Intended for use in a void return. /// \param I The instruction which escapes the pointer, if available. - void setEscaped(Instruction *I = nullptr) { - EscapedInfo.setInt(true); - EscapedInfo.setPointer(I); + void setEscaped(Instruction *I) { + assert(I && "Expected a valid pointer in setEscaped"); + EscapedInfo = I; } /// Mark the pointer as escaped, and the visit as aborted. Intended /// for use in a void return. /// \param I The instruction which both escapes the pointer and aborts the /// visit, if available. - void setEscapedAndAborted(Instruction *I = nullptr) { + void setEscapedAndAborted(Instruction *I) { setEscaped(I); setAborted(I); } private: - PointerIntPair AbortedInfo, EscapedInfo; + Instruction *AbortedInfo = nullptr; + Instruction *EscapedInfo = nullptr; }; protected: From d075debc508898d5f365f8e909c54d6f4edada85 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 24 Sep 2024 16:50:46 +0100 Subject: [PATCH 012/212] [AMDGPU] Fix chain handling when lowering barrier intrinsics (#109799) Previously we would fail an assertion in RemoveNodeFromCSEMaps after lowering: t3: ch = llvm.amdgcn.s.barrier.join t0, TargetConstant:i64<2973>, Constant:i32<0> to: t6: ch = S_BARRIER_JOIN_IMM TargetConstant:i32<0> --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +- .../AMDGPU/llvm.amdgcn.s.barrier.wait.ll | 221 +++++++++--------- 2 files changed, 117 insertions(+), 109 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a9754ba357893..08f2ff4566b67 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9365,6 +9365,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Opc = AMDGPU::S_GET_BARRIER_STATE_IMM; SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32); Ops.push_back(K); + Ops.push_back(Chain); } else { Opc = AMDGPU::S_GET_BARRIER_STATE_M0; SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(2)); @@ -9967,7 +9968,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, 0); } Ops.push_back(copyToM0(DAG, Chain, DL, M0Val).getValue(0)); - } else if (!IsInlinableBarID) { + } else if (IsInlinableBarID) { + Ops.push_back(Chain); + } else { Ops.push_back(copyToM0(DAG, Chain, DL, BarOp).getValue(0)); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll index a4be9ed8c2b4a..4fb28b392c9ea 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll @@ -768,17 +768,19 @@ define void @test5_s_barrier_init_m0(i32 %arg1 ,i32 %arg2) { } define amdgpu_kernel void @test1_s_barrier_join(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test1_s_barrier_join: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_barrier_join -1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_barrier_join -1 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -810,17 +812,19 @@ entry: } define amdgpu_kernel void @test2_s_barrier_join(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test2_s_barrier_join: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_barrier_join 1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_barrier_join 1 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -852,17 +856,19 @@ entry: } define amdgpu_kernel void @test3_s_barrier_join(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test3_s_barrier_join: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_barrier_join 0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_barrier_join 0 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -967,6 +973,20 @@ define void @test5_s_barrier_join_m0(i32 %arg) { ret void } +define void @test6_s_barrier_join_0() { +; GFX12-LABEL: test6_s_barrier_join_0: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_barrier_join 0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + call void @llvm.amdgcn.s.barrier.join(i32 0) + ret void +} + define amdgpu_kernel void @test1_s_barrier_leave(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) #0 { ; GFX12-SDAG-LABEL: test1_s_barrier_leave: ; GFX12-SDAG: ; %bb.0: ; %entry @@ -1026,17 +1046,19 @@ entry: } define amdgpu_kernel void @test1_s_wakeup_barrier(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test1_s_wakeup_barrier: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_wakeup_barrier -1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_wakeup_barrier -1 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -1068,17 +1090,19 @@ entry: } define amdgpu_kernel void @test2_s_wakeup_barrier(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test2_s_wakeup_barrier: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_wakeup_barrier 1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_wakeup_barrier 1 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -1110,17 +1134,19 @@ entry: } define amdgpu_kernel void @test3_s_wakeup_barrier(ptr addrspace(1) %out) #0 { +; ; GFX12-SDAG-LABEL: test3_s_wakeup_barrier: ; GFX12-SDAG: ; %bb.0: ; %entry ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: s_wakeup_barrier 0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0 ; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] +; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1] +; GFX12-SDAG-NEXT: s_wakeup_barrier 0 +; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1] ; GFX12-SDAG-NEXT: s_nop 0 ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX12-SDAG-NEXT: s_endpgm @@ -1226,34 +1252,21 @@ define void @test5_s_wakeup_barrier_m0(i32 %arg) { } define amdgpu_kernel void @test1_s_get_barrier_state(ptr addrspace(1) %out) #0 { -; GFX12-SDAG-LABEL: test1_s_get_barrier_state: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_get_barrier_state s4, -1 -; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-SDAG-NEXT: s_nop 0 -; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-SDAG-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: test1_s_get_barrier_state: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_get_barrier_state s2, -1 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_nop 0 -; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-GISEL-NEXT: s_endpgm +; GFX12-LABEL: test1_s_get_barrier_state: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 +; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_get_barrier_state s2, -1 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp @@ -1264,34 +1277,21 @@ entry: } define amdgpu_kernel void @test2_s_get_barrier_state(ptr addrspace(1) %out) #0 { -; GFX12-SDAG-LABEL: test2_s_get_barrier_state: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_get_barrier_state s4, 1 -; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-SDAG-NEXT: s_nop 0 -; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-SDAG-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: test2_s_get_barrier_state: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_get_barrier_state s2, 1 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_nop 0 -; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-GISEL-NEXT: s_endpgm +; GFX12-LABEL: test2_s_get_barrier_state: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 +; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_get_barrier_state s2, 1 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp @@ -1302,34 +1302,21 @@ entry: } define amdgpu_kernel void @test3_s_get_barrier_state(ptr addrspace(1) %out) #0 { -; GFX12-SDAG-LABEL: test3_s_get_barrier_state: -; GFX12-SDAG: ; %bb.0: ; %entry -; GFX12-SDAG-NEXT: s_get_barrier_state s4, 0 -; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-SDAG-NEXT: s_nop 0 -; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-SDAG-NEXT: s_endpgm -; -; GFX12-GISEL-LABEL: test3_s_get_barrier_state: -; GFX12-GISEL: ; %bb.0: ; %entry -; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 -; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_get_barrier_state s2, 0 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12-GISEL-NEXT: s_nop 0 -; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX12-GISEL-NEXT: s_endpgm +; GFX12-LABEL: test3_s_get_barrier_state: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24 +; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_get_barrier_state s2, 0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp @@ -1401,6 +1388,24 @@ define i32 @test5_s_get_barrier_state_m0(i32 %arg) { ret i32 %state } +define i32 @test6_s_get_barrier_state_0() { +; GFX12-LABEL: test6_s_get_barrier_state_0: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_get_barrier_state s0, 0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %state = call i32 @llvm.amdgcn.s.get.barrier.state(i32 0) + ret i32 %state +} + define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) #0 { ; GFX12-SDAG-LABEL: test_barrier_convert: ; GFX12-SDAG: ; %bb.0: ; %entry From d4a38c8ff5c993e14c42895b51a47272fb03a857 Mon Sep 17 00:00:00 2001 From: Usman Nadeem Date: Tue, 24 Sep 2024 08:54:36 -0700 Subject: [PATCH 013/212] =?UTF-8?q?[DFAJumpThreading]=20Handle=20select=20?= =?UTF-8?q?unfolding=20when=20user=20phi=20is=20not=20a=20dir=E2=80=A6=20(?= =?UTF-8?q?#109511)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ect successor Previously the code assumed that the select instruction is defined in a block that is a direct predecessor of the block where the PHINode uses it. So, we were hitting an assertion when we tried to access the def block as an incoming block for the user phi node. This patch handles that case by using the correct end block and creating a new phi node that aggregates both the values of the select in that end block, and then using that new unfolded phi to overwrite the original user phi node. Fixes #106083 Change-Id: Ie471994cca232318f74a6e6438efa21e561c2dc0 --- .../Transforms/Scalar/DFAJumpThreading.cpp | 46 ++++--- .../dfa-jump-threading-transform.ll | 123 ++++++++++++++++++ 2 files changed, 152 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index ef9c264482a64..0e2b5c925a6a7 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -194,7 +194,6 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, SelectInst *SI = SIToUnfold.getInst(); PHINode *SIUse = SIToUnfold.getUse(); BasicBlock *StartBlock = SI->getParent(); - BasicBlock *EndBlock = SIUse->getParent(); BranchInst *StartBlockTerm = dyn_cast(StartBlock->getTerminator()); @@ -202,6 +201,7 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, assert(SI->hasOneUse()); if (StartBlockTerm->isUnconditional()) { + BasicBlock *EndBlock = StartBlock->getUniqueSuccessor(); // Arbitrarily choose the 'false' side for a new input value to the PHI. BasicBlock *NewBlock = BasicBlock::Create( SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), @@ -223,32 +223,44 @@ void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, NewBlock->getFirstInsertionPt()); NewPhi->addIncoming(SIOp2, StartBlock); - if (auto *OpSi = dyn_cast(SIOp1)) - NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse)); - if (auto *OpSi = dyn_cast(SIOp2)) - NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi)); - - // Update the phi node of SI. - for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { - if (SIUse->getIncomingBlock(Idx) == StartBlock) - SIUse->setIncomingValue(Idx, SIOp1); + // Update any other PHI nodes in EndBlock. + for (PHINode &Phi : EndBlock->phis()) { + if (SIUse == &Phi) + continue; + Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock); } - SIUse->addIncoming(NewPhi, NewBlock); - // Update any other PHI nodes in EndBlock. - for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast(II); - ++II) { - if (Phi != SIUse) - Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock); + // Update the phi node of SI, which is its only use. + if (EndBlock == SIUse->getParent()) { + SIUse->addIncoming(NewPhi, NewBlock); + SIUse->replaceUsesOfWith(SI, SIOp1); + } else { + PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock), + Twine(SI->getName(), ".si.unfold.phi"), + EndBlock->getFirstInsertionPt()); + for (BasicBlock *Pred : predecessors(EndBlock)) { + if (Pred != StartBlock && Pred != NewBlock) + EndPhi->addIncoming(EndPhi, Pred); + } + + EndPhi->addIncoming(SIOp1, StartBlock); + EndPhi->addIncoming(NewPhi, NewBlock); + SIUse->replaceUsesOfWith(SI, EndPhi); + SIUse = EndPhi; } - StartBlockTerm->eraseFromParent(); + if (auto *OpSi = dyn_cast(SIOp1)) + NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse)); + if (auto *OpSi = dyn_cast(SIOp2)) + NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi)); // Insert the real conditional branch based on the original condition. + StartBlockTerm->eraseFromParent(); BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock); DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, {DominatorTree::Insert, StartBlock, NewBlock}}); } else { + BasicBlock *EndBlock = SIUse->getParent(); BasicBlock *NewBlockT = BasicBlock::Create( SI->getContext(), Twine(SI->getName(), ".si.unfold.true"), EndBlock->getParent(), EndBlock); diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll index c38f81d0f046e..cba1ba8dde768 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll @@ -300,3 +300,126 @@ define void @self-reference() { end: ret void } + +define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) { +; CHECK-LABEL: @pr106083_invalidBBarg_fold( +; CHECK-NEXT: bb: +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[SEL_SI_UNFOLD_FALSE:%.*]] +; CHECK: sel.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 1, [[BB:%.*]] ] +; CHECK-NEXT: br label [[BB1]] +; CHECK: BB1: +; CHECK-NEXT: [[I:%.*]] = phi i16 [ 0, [[BB1_BACKEDGE:%.*]] ], [ 0, [[BB]] ], [ 1, [[BB7:%.*]] ], [ 0, [[SEL_SI_UNFOLD_FALSE]] ], [ 1, [[BB7_JT0:%.*]] ] +; CHECK-NEXT: [[SEL_SI_UNFOLD_PHI:%.*]] = phi i32 [ [[SEL_SI_UNFOLD_PHI]], [[BB1_BACKEDGE]] ], [ [[SEL_SI_UNFOLD_PHI]], [[BB7]] ], [ 0, [[BB]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SEL_SI_UNFOLD_FALSE]] ], [ [[SEL_SI_UNFOLD_PHI]], [[BB7_JT0]] ] +; CHECK-NEXT: br i1 [[NOT:%.*]], label [[BB7_JT0]], label [[BB2:%.*]] +; CHECK: BB2: +; CHECK-NEXT: store i16 0, ptr [[D:%.*]], align 2 +; CHECK-NEXT: br i1 [[CMP2:%.*]], label [[BB7]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: spec.select.si.unfold.false: +; CHECK-NEXT: br label [[BB7]] +; CHECK: spec.select.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[BB2]] ] +; CHECK-NEXT: br label [[BB7_JT0]] +; CHECK: BB7: +; CHECK-NEXT: [[D_PROMOTED4:%.*]] = phi i16 [ 1, [[BB2]] ], [ 1, [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]] ] +; CHECK-NEXT: [[_3:%.*]] = phi i32 [ [[SEL_SI_UNFOLD_PHI]], [[BB2]] ], [ poison, [[SPEC_SELECT_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: switch i32 [[_3]], label [[BB1_BACKEDGE]] [ +; CHECK-NEXT: i32 0, label [[BB1]] +; CHECK-NEXT: i32 1, label [[BB8:%.*]] +; CHECK-NEXT: ] +; CHECK: BB7.jt0: +; CHECK-NEXT: [[D_PROMOTED4_JT0:%.*]] = phi i16 [ 0, [[BB1]] ], [ 1, [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: [[_3_JT0:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[BB1]] +; CHECK: BB1.backedge: +; CHECK-NEXT: br label [[BB1]] +; CHECK: BB8: +; CHECK-NEXT: ret void +; +bb: + %sel = select i1 %cmp1, i32 0, i32 1 + br label %BB1 + +BB1: ; preds = %BB1.backedge, %BB7, %bb + %i = phi i16 [ 0, %BB1.backedge ], [ 0, %bb ], [ 1, %BB7 ] + br i1 %not, label %BB7, label %BB2 + +BB2: ; preds = %BB1 + store i16 0, ptr %d, align 2 + %spec.select = select i1 %cmp2, i32 %sel, i32 0 + br label %BB7 + +BB7: ; preds = %BB2, %BB1 + %d.promoted4 = phi i16 [ 0, %BB1 ], [ 1, %BB2 ] + %_3 = phi i32 [ 0, %BB1 ], [ %spec.select, %BB2 ] + switch i32 %_3, label %BB1.backedge [ + i32 0, label %BB1 + i32 1, label %BB8 + ] + +BB1.backedge: ; preds = %BB7 + br label %BB1 + +BB8: ; preds = %BB7 + ret void +} + +define void @pr106083_select_dead_uses(i1 %cmp1, i1 %not, ptr %p) { +; CHECK-LABEL: @pr106083_select_dead_uses( +; CHECK-NEXT: bb: +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[DOTLOOPEXIT6:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]] +; CHECK: spec.select.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 1, [[BB:%.*]] ] +; CHECK-NEXT: br label [[DOTLOOPEXIT6]] +; CHECK: .loopexit6: +; CHECK-NEXT: [[SPEC_SELECT_SI_UNFOLD_PHI:%.*]] = phi i32 [ [[SPEC_SELECT_SI_UNFOLD_PHI]], [[SELECT_UNFOLD:%.*]] ], [ 0, [[BB]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SPEC_SELECT_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: br i1 [[NOT:%.*]], label [[SELECT_UNFOLD_JT0:%.*]], label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[NOT2:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: br i1 [[NOT2]], label [[SELECT_UNFOLD]], label [[SPEC_SELECT7_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: spec.select7.si.unfold.false: +; CHECK-NEXT: br label [[SELECT_UNFOLD]] +; CHECK: spec.select7.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[BB1]] ] +; CHECK-NEXT: br label [[SELECT_UNFOLD_JT0]] +; CHECK: select.unfold: +; CHECK-NEXT: [[_2:%.*]] = phi i32 [ [[SPEC_SELECT_SI_UNFOLD_PHI]], [[BB1]] ], [ poison, [[SPEC_SELECT7_SI_UNFOLD_FALSE:%.*]] ] +; CHECK-NEXT: switch i32 [[_2]], label [[BB2:%.*]] [ +; CHECK-NEXT: i32 0, label [[DOTPREHEADER_PREHEADER:%.*]] +; CHECK-NEXT: i32 1, label [[DOTLOOPEXIT6]] +; CHECK-NEXT: ] +; CHECK: select.unfold.jt0: +; CHECK-NEXT: [[_2_JT0:%.*]] = phi i32 [ 0, [[DOTLOOPEXIT6]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT7_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[DOTPREHEADER_PREHEADER]] +; CHECK: .preheader.preheader: +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: unreachable +; +bb: + %spec.select = select i1 %cmp1, i32 0, i32 1 + br label %.loopexit6 + +.loopexit6: ; preds = %select.unfold, %bb + br i1 %not, label %select.unfold, label %bb1 + +bb1: ; preds = %.loopexit6 + %i = load i32, ptr %p, align 4 + %not2 = icmp eq i32 0, 0 + %spec.select7 = select i1 %not2, i32 %spec.select, i32 0 + br label %select.unfold + +select.unfold: ; preds = %bb1, %.loopexit6 + %_2 = phi i32 [ 0, %.loopexit6 ], [ %spec.select7, %bb1 ] + switch i32 %_2, label %bb2 [ + i32 0, label %.preheader.preheader + i32 1, label %.loopexit6 + ] + +.preheader.preheader: ; preds = %select.unfold + ret void + +bb2: ; preds = %select.unfold + unreachable +} From 679c9717dfc9687a3bca78b45d9fd104b67e16f9 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 24 Sep 2024 18:07:18 +0200 Subject: [PATCH 014/212] [clang][bytecode] Fix vector shifts on big-endian systems (#109800) For shifts, the LHS and RHS element types might be different. The variable naming here could probably use some love now, but I'm trying to fix this as fast as possible. See the discussion in https://github.com/llvm/llvm-project/pull/108949 --- clang/lib/AST/ByteCode/Compiler.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 754cd0db9868b..785918846976d 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -1282,9 +1282,8 @@ bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { ? BinaryOperator::getOpForCompoundAssignment(E->getOpcode()) : E->getOpcode(); - // The LHS and RHS of a comparison operator must have the same type. So we - // just use LHS vector element type here. PrimType ElemT = this->classifyVectorElementType(LHS->getType()); + PrimType RHSElemT = this->classifyVectorElementType(RHS->getType()); PrimType ResultElemT = this->classifyVectorElementType(E->getType()); // Evaluate LHS and save value to LHSOffset. @@ -1312,7 +1311,7 @@ bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { PrimType PromotT = classifyPrim(PromotTy); PrimType OpT = NeedIntPromot ? PromotT : ElemT; - auto getElem = [=](unsigned Offset, unsigned Index) { + auto getElem = [=](unsigned Offset, PrimType ElemT, unsigned Index) { if (!this->emitGetLocal(PT_Ptr, Offset, E)) return false; if (!this->emitArrayElemPop(ElemT, Index, E)) @@ -1342,9 +1341,9 @@ bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { } for (unsigned I = 0; I != VecTy->getNumElements(); ++I) { - if (!getElem(LHSOffset, I)) + if (!getElem(LHSOffset, ElemT, I)) return false; - if (!getElem(RHSOffset, I)) + if (!getElem(RHSOffset, RHSElemT, I)) return false; switch (Op) { case BO_Add: @@ -1372,11 +1371,11 @@ bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { return false; break; case BO_Shl: - if (!this->emitShl(OpT, ElemT, E)) + if (!this->emitShl(OpT, RHSElemT, E)) return false; break; case BO_Shr: - if (!this->emitShr(OpT, ElemT, E)) + if (!this->emitShr(OpT, RHSElemT, E)) return false; break; case BO_EQ: From 206408732bca2ef464732a39c8319d47c8a1dbea Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Tue, 24 Sep 2024 09:08:08 -0700 Subject: [PATCH 015/212] [lldb][NFC] Add a missing setter method for UnwindPlans (#109751) The UnwindPlan class has getter and setter methods for specifying an abstract register location, but it doesn't have a setter for a DWARF Expression register location. This hasn't been needed for any of the UnwindPlans that we do in mainline lldb yet, but it is used in the Swift language SwiftLanguageRuntime plugin which creates UnwindPlan for async functions. While it's currently unused on main branch, this is a straightforward setter in the same form as the others, the only caveat being that it doesn't own the dwarf expression bytes, it has a pointer to them. --- lldb/include/lldb/Symbol/UnwindPlan.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lldb/include/lldb/Symbol/UnwindPlan.h b/lldb/include/lldb/Symbol/UnwindPlan.h index a1d00f2d2c0cd..e1567c7357d0b 100644 --- a/lldb/include/lldb/Symbol/UnwindPlan.h +++ b/lldb/include/lldb/Symbol/UnwindPlan.h @@ -370,6 +370,13 @@ class UnwindPlan { bool SetRegisterLocationToSame(uint32_t reg_num, bool must_replace); + /// This method does not make a copy of the \a opcodes memory, it is + /// assumed to have the same lifetime as the Module this UnwindPlan will + /// be registered in. + bool SetRegisterLocationToIsDWARFExpression(uint32_t reg_num, + const uint8_t *opcodes, + uint32_t len, bool can_replace); + bool SetRegisterLocationToIsConstant(uint32_t reg_num, uint64_t constant, bool can_replace); From 7773243d9916f98ba0ffce0c3a960e4aa9f03e81 Mon Sep 17 00:00:00 2001 From: Elvina Yakubova Date: Tue, 24 Sep 2024 17:10:47 +0100 Subject: [PATCH 016/212] [SLP] Move more X86 tests to common directory (#109821) Some of the tests from the X86 directory can be generalized to improve coverage for other architectures --- .../SLPVectorizer/{X86 => }/peek-through-shuffle.ll | 3 ++- .../SLPVectorizer/{X86 => }/phi-node-bitwidt-op-not.ll | 3 ++- .../Transforms/SLPVectorizer/{X86 => }/phi-undef-input.ll | 3 ++- .../Transforms/SLPVectorizer/{X86 => }/postponed_gathers.ll | 3 ++- .../SLPVectorizer/{X86 => }/pr31599-inseltpoison.ll | 3 ++- llvm/test/Transforms/SLPVectorizer/{X86 => }/pr31599.ll | 3 ++- .../{X86 => }/reduction-gather-non-scheduled-extracts.ll | 3 ++- .../SLPVectorizer/{X86 => }/reduction-modified-values.ll | 3 ++- .../SLPVectorizer/{X86 => }/reorder-clustered-node.ll | 3 ++- .../SLPVectorizer/{X86 => }/reordered-top-scalars.ll | 3 ++- .../SLPVectorizer/{X86 => }/reordering-single-phi.ll | 3 ++- .../{X86 => }/reused-buildvector-matching-vectorized-node.ll | 3 ++- .../SLPVectorizer/{X86 => }/root-trunc-extract-reuse.ll | 3 ++- .../{X86 => }/same-scalar-in-same-phi-extract.ll | 3 ++- .../Transforms/SLPVectorizer/{X86 => }/scalarazied-result.ll | 3 ++- .../SLPVectorizer/{X86 => }/scalarization-overhead.ll | 3 ++- .../SLPVectorizer/{X86 => }/shrink_after_reorder2.ll | 3 ++- .../SLPVectorizer/{X86 => }/shuffle-multivector.ll | 3 ++- .../Transforms/SLPVectorizer/{X86 => }/shufflebuilder-bug.ll | 3 ++- .../Transforms/SLPVectorizer/{X86 => }/stores-non-ordered.ll | 3 ++- .../Transforms/SLPVectorizer/{X86 => }/unknown-entries.ll | 5 ++--- .../SLPVectorizer/{X86 => }/zext-incoming-for-neg-icmp.ll | 3 ++- 22 files changed, 44 insertions(+), 24 deletions(-) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/peek-through-shuffle.ll (85%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/phi-node-bitwidt-op-not.ll (94%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/phi-undef-input.ll (96%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/postponed_gathers.ll (90%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/pr31599-inseltpoison.ll (78%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/pr31599.ll (78%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/reduction-gather-non-scheduled-extracts.ll (85%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/reduction-modified-values.ll (83%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/reorder-clustered-node.ll (93%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/reordered-top-scalars.ll (83%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/reordering-single-phi.ll (93%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/reused-buildvector-matching-vectorized-node.ll (94%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/root-trunc-extract-reuse.ll (86%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/same-scalar-in-same-phi-extract.ll (88%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/scalarazied-result.ll (60%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/scalarization-overhead.ll (92%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/shrink_after_reorder2.ll (91%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/shuffle-multivector.ll (89%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/shufflebuilder-bug.ll (89%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/stores-non-ordered.ll (92%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/unknown-entries.ll (82%) rename llvm/test/Transforms/SLPVectorizer/{X86 => }/zext-incoming-for-neg-icmp.ll (89%) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/peek-through-shuffle.ll similarity index 85% rename from llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll rename to llvm/test/Transforms/SLPVectorizer/peek-through-shuffle.ll index c157f6117df95..839c1ebed6bcf 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/peek-through-shuffle.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer < %s -mtriple=aarch64-unknown-linux-gnu -o - | FileCheck %s %} define void @foo(ptr %0, <4 x float> %1) { ; CHECK-LABEL: @foo( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll similarity index 94% rename from llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll rename to llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll index f376ca71c7769..2037e0d67d2f8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-bitwidt-op-not.ll +++ b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) { ; CHECK-LABEL: define i32 @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll similarity index 96% rename from llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll rename to llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll index 3cc32c1fc7b28..b9802a0adb8aa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll +++ b/llvm/test/Transforms/SLPVectorizer/phi-undef-input.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-1000 -mtriple=x86_64 -S | FileCheck %s +; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -slp-threshold=-1000 -mtriple=x86_64 -S | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -slp-threshold=-1000 -mtriple=aarch64-unknown-linux-gnu -S | FileCheck %s %} ; The inputs to vector phi should remain undef. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll b/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll similarity index 90% rename from llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll rename to llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll index 488ca0b23cd9c..f6bed797b9ba9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll +++ b/llvm/test/Transforms/SLPVectorizer/postponed_gathers.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-10 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s +; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -slp-threshold=-10 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -slp-threshold=-10 -mtriple=aarch64-unknown-linux-gnu -S | FileCheck %s %} define void @foo() { ; CHECK-LABEL: define void @foo() { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr31599-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/pr31599-inseltpoison.ll similarity index 78% rename from llvm/test/Transforms/SLPVectorizer/X86/pr31599-inseltpoison.ll rename to llvm/test/Transforms/SLPVectorizer/pr31599-inseltpoison.ll index 5506f61fe134b..fe5871d73cd5e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr31599-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/pr31599-inseltpoison.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define <2 x float> @foo() { ; CHECK-LABEL: @foo( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr31599.ll b/llvm/test/Transforms/SLPVectorizer/pr31599.ll similarity index 78% rename from llvm/test/Transforms/SLPVectorizer/X86/pr31599.ll rename to llvm/test/Transforms/SLPVectorizer/pr31599.ll index 348656e07c6be..10b9b224d556e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr31599.ll +++ b/llvm/test/Transforms/SLPVectorizer/pr31599.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define <2 x float> @foo() { ; CHECK-LABEL: @foo( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll b/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll similarity index 85% rename from llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll rename to llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll index 03c8767eff327..f1034f3971135 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-gather-non-scheduled-extracts.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define void @tes() { ; CHECK-LABEL: define void @tes() { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-modified-values.ll b/llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll similarity index 83% rename from llvm/test/Transforms/SLPVectorizer/X86/reduction-modified-values.ll rename to llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll index dbf490c5fe6a2..be9318e467174 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-modified-values.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define i32 @test() { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll b/llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll similarity index 93% rename from llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll rename to llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll index 1a6ff2385905b..561182d5e4f49 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/reorder-clustered-node.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64 -slp-threshold=-150 | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=x86_64 -slp-threshold=-150 | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-150 | FileCheck %s %} define i1 @test(ptr %arg, ptr %i233, i64 %i241, ptr %i235, ptr %i237, ptr %i227) { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/reordered-top-scalars.ll similarity index 83% rename from llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll rename to llvm/test/Transforms/SLPVectorizer/reordered-top-scalars.ll index 4517d27598b60..1de5ee2298837 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll +++ b/llvm/test/Transforms/SLPVectorizer/reordered-top-scalars.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown %s -slp-threshold=-5 | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown %s -slp-threshold=-5 | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown %s -slp-threshold=-5 | FileCheck %s %} define i32 @test(ptr %isec) { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll b/llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll similarity index 93% rename from llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll rename to llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll index bc1eaaac5d1bb..a70daf9cf8d60 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reordering-single-phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/reordering-single-phi.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux < %s | FileCheck %s %} @a = external global [32000 x float], align 64 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll similarity index 94% rename from llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll rename to llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll index 2b425ee624700..3e00550a88521 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; CHECK-LABEL: define void @blam diff --git a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll similarity index 86% rename from llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll rename to llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll index af46b4f576234..34c068478c5f5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/root-trunc-extract-reuse.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64 < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64 < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define i1 @test() { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll b/llvm/test/Transforms/SLPVectorizer/same-scalar-in-same-phi-extract.ll similarity index 88% rename from llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll rename to llvm/test/Transforms/SLPVectorizer/same-scalar-in-same-phi-extract.ll index f1be11d0d0fc5..fe0813542f309 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/same-scalar-in-same-phi-extract.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define void @test(i32 %arg) { ; CHECK-LABEL: define void @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scalarazied-result.ll b/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll similarity index 60% rename from llvm/test/Transforms/SLPVectorizer/X86/scalarazied-result.ll rename to llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll index 1d6e191c6f97b..2570cdb45e1e7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scalarazied-result.ll +++ b/llvm/test/Transforms/SLPVectorizer/scalarazied-result.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -S < %s | FileCheck %s %} define void @test() { ; CHECK-LABEL: @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll similarity index 92% rename from llvm/test/Transforms/SLPVectorizer/X86/scalarization-overhead.ll rename to llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll index 55e155840f858..9f6b285f1ab90 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scalarization-overhead.ll +++ b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=x86_64-- -passes=slp-vectorizer -S < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -mtriple=x86_64-- -passes=slp-vectorizer -S < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -mtriple=aarch64-- -passes=slp-vectorizer -S < %s | FileCheck %s %} ; Crash Test case reported on D134605 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll similarity index 91% rename from llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll rename to llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll index 9e3ba05f88da8..2f0bd4a8f1315 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll +++ b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -o - -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -o - -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -o - -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} %class.e = type { i32, i32 } %struct.a = type { i32, i32, i32, i32 } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll b/llvm/test/Transforms/SLPVectorizer/shuffle-multivector.ll similarity index 89% rename from llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll rename to llvm/test/Transforms/SLPVectorizer/shuffle-multivector.ll index c2555889f5981..2253c70dc2501 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multivector.ll +++ b/llvm/test/Transforms/SLPVectorizer/shuffle-multivector.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-163 | FileCheck %s +; RUN: %if x86-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-163 | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -passes=slp-vectorizer -S < %s -mtriple=aarch64-unknown-linux -slp-threshold=-163 | FileCheck %s %} define void @test1(i128 %p0, i128 %p1, i128 %p2, i128 %p3, <4 x i128> %vec) { ; CHECK-LABEL: @test1( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll b/llvm/test/Transforms/SLPVectorizer/shufflebuilder-bug.ll similarity index 89% rename from llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll rename to llvm/test/Transforms/SLPVectorizer/shufflebuilder-bug.ll index 9db7d696c7c7e..019c9eadd7c09 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shufflebuilder-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/shufflebuilder-bug.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -p slp-vectorizer -mtriple=x86_64-- %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S -p slp-vectorizer -mtriple=x86_64-- %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S -p slp-vectorizer -mtriple=aarch64-unknown-linux-gnu %s | FileCheck %s %} define void @foo(<4 x float> %vec, float %val, ptr %ptr) { ; CHECK-LABEL: define void @foo diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll b/llvm/test/Transforms/SLPVectorizer/stores-non-ordered.ll similarity index 92% rename from llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll rename to llvm/test/Transforms/SLPVectorizer/stores-non-ordered.ll index a9748ca6291ae..aaa6be73056bd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stores-non-ordered.ll +++ b/llvm/test/Transforms/SLPVectorizer/stores-non-ordered.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -mtriple=x86_64-unknown -passes=slp-vectorizer -slp-min-reg-size=64 -slp-threshold=-1000 | FileCheck %s +; RUN: %if x86-registered-target %{ opt < %s -S -mtriple=x86_64-unknown -passes=slp-vectorizer -slp-min-reg-size=64 -slp-threshold=-1000 | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -S -mtriple=aarch64-unknown -passes=slp-vectorizer -slp-min-reg-size=64 -slp-threshold=-1000 | FileCheck %s %} define i32 @non-ordered-stores(ptr noalias nocapture %in, ptr noalias nocapture %inn, ptr noalias nocapture %out) { ; CHECK-LABEL: @non-ordered-stores( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/unknown-entries.ll b/llvm/test/Transforms/SLPVectorizer/unknown-entries.ll similarity index 82% rename from llvm/test/Transforms/SLPVectorizer/X86/unknown-entries.ll rename to llvm/test/Transforms/SLPVectorizer/unknown-entries.ll index fc22280c2b8ad..ca9aa451a9a3a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/unknown-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/unknown-entries.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s - -target triple = "x86_64-unknown-linux-gnu" +; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -S | FileCheck %s %} define <3 x i64> @ahyes(i64 %position, i64 %value) { ; CHECK-LABEL: define <3 x i64> @ahyes( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll b/llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll similarity index 89% rename from llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll rename to llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll index 7f086d17ca4c0..89fcc7e983749 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll +++ b/llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define i32 @test(i32 %a, i8 %b, i8 %c) { ; CHECK-LABEL: define i32 @test( From fe6a3d46aa658fdd1e9a6cbb2031a597a3e59536 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 24 Sep 2024 09:32:42 -0700 Subject: [PATCH 017/212] [libc] Implement the 'rename' function on the GPU (#109814) Summary: Straightforward implementation like the other `stdio.h` functions. --- libc/config/gpu/entrypoints.txt | 1 + libc/docs/gpu/support.rst | 1 + libc/include/llvm-libc-types/rpc_opcodes_t.h | 1 + libc/src/stdio/gpu/CMakeLists.txt | 11 +++++++ libc/src/stdio/gpu/rename.cpp | 30 ++++++++++++++++++++ libc/utils/gpu/server/rpc_server.cpp | 18 ++++++++++++ 6 files changed, 62 insertions(+) create mode 100644 libc/src/stdio/gpu/rename.cpp diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 9fb89e6fd8d28..b4cfe47f4505f 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -240,6 +240,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.putchar libc.src.stdio.puts libc.src.stdio.remove + libc.src.stdio.rename libc.src.stdio.stderr libc.src.stdio.stdin libc.src.stdio.stdout diff --git a/libc/docs/gpu/support.rst b/libc/docs/gpu/support.rst index 44c21c7b4c1ff..9c151a5fbac1f 100644 --- a/libc/docs/gpu/support.rst +++ b/libc/docs/gpu/support.rst @@ -240,6 +240,7 @@ fputs |check| |check| fputc |check| |check| fwrite |check| |check| remove |check| |check| +rename |check| |check| putc |check| |check| printf |check| |check| vprintf |check| |check| diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h index 3b388de6888c5..1a6c0cd9bc4a1 100644 --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h @@ -38,6 +38,7 @@ typedef enum { RPC_PRINTF_TO_STDERR_PACKED, RPC_PRINTF_TO_STREAM_PACKED, RPC_REMOVE, + RPC_RENAME, RPC_SYSTEM, RPC_LAST = 0xFFFF, } rpc_opcode_t; diff --git a/libc/src/stdio/gpu/CMakeLists.txt b/libc/src/stdio/gpu/CMakeLists.txt index 86470b8425e95..9cac42ed71fb7 100644 --- a/libc/src/stdio/gpu/CMakeLists.txt +++ b/libc/src/stdio/gpu/CMakeLists.txt @@ -294,6 +294,17 @@ add_entrypoint_object( .vfprintf_utils ) +add_entrypoint_object( + rename + SRCS + rename.cpp + HDRS + ../rename.h + DEPENDS + libc.hdr.types.FILE + .gpu_file +) + add_entrypoint_object( stdin SRCS diff --git a/libc/src/stdio/gpu/rename.cpp b/libc/src/stdio/gpu/rename.cpp new file mode 100644 index 0000000000000..1087228835842 --- /dev/null +++ b/libc/src/stdio/gpu/rename.cpp @@ -0,0 +1,30 @@ +//===-- GPU Implementation of rename --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/rename.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/macros/config.h" +#include "src/stdio/gpu/file.h" + +#include "hdr/types/FILE.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(int, rename, (const char *oldpath, const char *newpath)) { + int ret; + rpc::Client::Port port = rpc::client.open(); + port.send_n(oldpath, internal::string_length(oldpath) + 1); + port.send_n(newpath, internal::string_length(newpath) + 1); + port.recv( + [&](rpc::Buffer *buffer) { ret = static_cast(buffer->data[0]); }); + port.close(); + + return ret; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index 8708f946b310e..aa65dfe69c385 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -392,6 +392,24 @@ rpc_status_t handle_server_impl( }); break; } + case RPC_RENAME: { + uint64_t oldsizes[lane_size] = {0}; + uint64_t newsizes[lane_size] = {0}; + void *oldpath[lane_size] = {nullptr}; + void *newpath[lane_size] = {nullptr}; + port->recv_n(oldpath, oldsizes, + [&](uint64_t size) { return new char[size]; }); + port->recv_n(newpath, newsizes, + [&](uint64_t size) { return new char[size]; }); + port->send([&](rpc::Buffer *buffer, uint32_t id) { + buffer->data[0] = static_cast( + rename(reinterpret_cast(oldpath[id]), + reinterpret_cast(newpath[id]))); + delete[] reinterpret_cast(oldpath[id]); + delete[] reinterpret_cast(newpath[id]); + }); + break; + } case RPC_SYSTEM: { uint64_t sizes[lane_size] = {0}; void *args[lane_size] = {nullptr}; From 8e68a512ef568324da60c8b2705e9b087d06ebcf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Sep 2024 16:09:33 +0100 Subject: [PATCH 018/212] [X86] Add test coverage for #109790 --- llvm/test/CodeGen/X86/pmulh.ll | 72 ++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll index c2a009f06b89d..65337b8e1316f 100644 --- a/llvm/test/CodeGen/X86/pmulh.ll +++ b/llvm/test/CodeGen/X86/pmulh.ll @@ -937,6 +937,77 @@ define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) { ret <16 x i32> %d } +; PR109790 +define <16 x i16> @zext_mulhuw_v16i16_negative_constant(<16 x i16> %a) { +; SSE-LABEL: zext_mulhuw_v16i16_negative_constant: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64536,64536,64536,64536,64536,64536,64536,64536] +; SSE-NEXT: pmulhw %xmm2, %xmm0 +; SSE-NEXT: pmulhw %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: zext_mulhuw_v16i16_negative_constant: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536] +; AVX-NEXT: retq + %k = and <16 x i16> %a, + %x = zext nneg <16 x i16> %k to <16 x i32> + %m = mul nsw <16 x i32> %x, + %s = lshr <16 x i32> %m, + %t = trunc nuw <16 x i32> %s to <16 x i16> + ret <16 x i16> %t +} + +; PR109790 +define <16 x i16> @zext_mulhuw_v16i16_positive_constant(<16 x i16> %a) { +; SSE-LABEL: zext_mulhuw_v16i16_positive_constant: +; SSE: # %bb.0: +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1000,1000,1000,1000,1000,1000,1000,1000] +; SSE-NEXT: pmulhw %xmm2, %xmm0 +; SSE-NEXT: pmulhw %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX2-LABEL: zext_mulhuw_v16i16_positive_constant: +; AVX2: # %bb.0: +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0] +; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpackusdw %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: zext_mulhuw_v16i16_positive_constant: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: zext_mulhuw_v16i16_positive_constant: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0] +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: retq + %k = and <16 x i16> %a, + %x = zext nneg <16 x i16> %k to <16 x i32> + %m = mul nuw nsw <16 x i32> %x, + %s = lshr <16 x i32> %m, + %t = trunc nuw nsw <16 x i32> %s to <16 x i16> + ret <16 x i16> %t +} + define <16 x i32> @mulhsw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) { ; SSE2-LABEL: mulhsw_v16i16_lshr: ; SSE2: # %bb.0: @@ -2056,3 +2127,4 @@ define <8 x i16> @sse2_pmulhu_w_const(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) + From 01be0252c834bc7be222057049d697f488493543 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Sep 2024 16:49:20 +0100 Subject: [PATCH 019/212] [X86] combinePMULH - simplify pattern matching with SDPatternMatch. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d7a26dc4caec6..cf6617f49e506 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -52983,10 +52983,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, // combiner. static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - // First instruction should be a right shift of a multiply. - if (Src.getOpcode() != ISD::SRL || - Src.getOperand(0).getOpcode() != ISD::MUL) - return SDValue(); + using namespace llvm::SDPatternMatch; if (!Subtarget.hasSSE2()) return SDValue(); @@ -53001,15 +52998,12 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, if (InVT.getVectorElementType().getSizeInBits() < 32) return SDValue(); - // Need a shift by 16. - APInt ShiftAmt; - if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) || - ShiftAmt != 16) + // First instruction should be a right shift by 16 of a multiply. + SDValue LHS, RHS; + if (!sd_match(Src, + m_Srl(m_Mul(m_Value(LHS), m_Value(RHS)), m_SpecificInt(16)))) return SDValue(); - SDValue LHS = Src.getOperand(0).getOperand(0); - SDValue RHS = Src.getOperand(0).getOperand(1); - // Count leading sign/zero bits on both inputs - if there are enough then // truncation back to vXi16 will be cheap - either as a pack/shuffle // sequence or using AVX512 truncations. If the inputs are sext/zext then the From 9e60a6ad4242274c39242079d5351fcb9d6d99e6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Sep 2024 16:54:58 +0100 Subject: [PATCH 020/212] [X86] combinePMULH - pull out repeated IsTruncateFree helper. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cf6617f49e506..65dc107f04b4f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53021,12 +53021,12 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, return SDValue(); // Check if both inputs are extensions, which will be removed by truncation. - bool IsTruncateFree = (LHS.getOpcode() == ISD::SIGN_EXTEND || - LHS.getOpcode() == ISD::ZERO_EXTEND) && - (RHS.getOpcode() == ISD::SIGN_EXTEND || - RHS.getOpcode() == ISD::ZERO_EXTEND) && - LHS.getOperand(0).getScalarValueSizeInBits() <= 16 && - RHS.getOperand(0).getScalarValueSizeInBits() <= 16; + auto isOpTruncateFree = [](SDValue Op) { + return (Op.getOpcode() == ISD::SIGN_EXTEND || + Op.getOpcode() == ISD::ZERO_EXTEND) && + Op.getOperand(0).getScalarValueSizeInBits() <= 16; + }; + bool IsTruncateFree = isOpTruncateFree(LHS) && isOpTruncateFree(RHS); // For AVX2+ targets, with the upper bits known zero, we can perform MULHU on // the (bitcasted) inputs directly, and then cheaply pack/truncate the result From 406a2128f57b1e86748cd6de6d3eab6b1e2db2ad Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Sep 2024 17:37:26 +0100 Subject: [PATCH 021/212] [X86] combinePMULH - we can treat constant vectors as freely truncatable. Fixes #109790 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++--- llvm/test/CodeGen/X86/pmulh.ll | 31 ++++--------------------- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 65dc107f04b4f..d9eedfdfd53a4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53022,9 +53022,10 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, // Check if both inputs are extensions, which will be removed by truncation. auto isOpTruncateFree = [](SDValue Op) { - return (Op.getOpcode() == ISD::SIGN_EXTEND || - Op.getOpcode() == ISD::ZERO_EXTEND) && - Op.getOperand(0).getScalarValueSizeInBits() <= 16; + if (Op.getOpcode() == ISD::SIGN_EXTEND || + Op.getOpcode() == ISD::ZERO_EXTEND) + return Op.getOperand(0).getScalarValueSizeInBits() <= 16; + return ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); }; bool IsTruncateFree = isOpTruncateFree(LHS) && isOpTruncateFree(RHS); diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll index 65337b8e1316f..502249a87c489 100644 --- a/llvm/test/CodeGen/X86/pmulh.ll +++ b/llvm/test/CodeGen/X86/pmulh.ll @@ -974,32 +974,11 @@ define <16 x i16> @zext_mulhuw_v16i16_positive_constant(<16 x i16> %a) { ; SSE-NEXT: pmulhw %xmm2, %xmm1 ; SSE-NEXT: retq ; -; AVX2-LABEL: zext_mulhuw_v16i16_positive_constant: -; AVX2: # %bb.0: -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0] -; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpackusdw %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX2-NEXT: retq -; -; AVX512F-LABEL: zext_mulhuw_v16i16_positive_constant: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000] -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: zext_mulhuw_v16i16_positive_constant: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0] -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: retq +; AVX-LABEL: zext_mulhuw_v16i16_positive_constant: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000] +; AVX-NEXT: retq %k = and <16 x i16> %a, %x = zext nneg <16 x i16> %k to <16 x i32> %m = mul nuw nsw <16 x i32> %x, From e64673d3174af941eb3c9f1ad822792154aa1d31 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Sep 2024 09:49:32 -0700 Subject: [PATCH 022/212] [RISCV] Treat insert_subvector into undef with index==0 as legal. (#109745) Regardless of fixed and scalable type. We can always use subreg ops. We don't need to do any container conversion. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b998a1eb11c30..bf822eb2c6eeb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10152,13 +10152,15 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, unsigned OrigIdx = Op.getConstantOperandVal(2); const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); + if (OrigIdx == 0 && Vec.isUndef()) + return Op; + // We don't have the ability to slide mask vectors up indexed by their i1 // elements; the smallest we can do is i8. Often we are able to bitcast to // equivalent i8 vectors. Note that when inserting a fixed-length vector // into a scalable one, we might not necessarily have enough scalable // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid. - if (SubVecVT.getVectorElementType() == MVT::i1 && - (OrigIdx != 0 || !Vec.isUndef())) { + if (SubVecVT.getVectorElementType() == MVT::i1) { if (VecVT.getVectorMinNumElements() >= 8 && SubVecVT.getVectorMinNumElements() >= 8) { assert(OrigIdx % 8 == 0 && "Invalid index"); @@ -10196,8 +10198,6 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, // vector group up the full amount. const auto VLen = Subtarget.getRealVLen(); if (SubVecVT.isFixedLengthVector() && !VLen) { - if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) - return Op; MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VecVT); @@ -10208,11 +10208,6 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, DAG.getUNDEF(ContainerVT), SubVec, DAG.getVectorIdxConstant(0, DL)); - if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { - SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); - return DAG.getBitcast(Op.getValueType(), SubVec); - } - SDValue Mask = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. Note From c6bf59f26b2d74474a66182db6ebd576273bfb00 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Sep 2024 17:45:10 +0100 Subject: [PATCH 023/212] [X86] Add test coverage for #109272 --- .../X86/vector-shuffle-combining-avx512vbmi.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll index 9b32005927ace..61814b48e6b3a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll @@ -146,3 +146,18 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64 %res2 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %res1, <64 x i8> , <64 x i8> zeroinitializer, i64 %m) ret <64 x i8> %res2 } + +; PR109272 +define <64 x i8> @combine_vpermi2var_v64i8_with_mask(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2) { +; CHECK-LABEL: combine_vpermi2var_v64i8_with_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vpmovb2m %zmm1, %k0 +; CHECK-NEXT: vpmovm2b %k0, %zmm1 +; CHECK-NEXT: vpandnq %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2) + %cmp = icmp slt <64 x i8> %a1, zeroinitializer + %sel = select <64 x i1> %cmp, <64 x i8> zeroinitializer, <64 x i8> %perm + ret <64 x i8> %sel +} From 04b443e77845cd20ab5acc4356cee509316135dd Mon Sep 17 00:00:00 2001 From: jimingham Date: Tue, 24 Sep 2024 10:00:00 -0700 Subject: [PATCH 024/212] Add the ability to define custom completers to the parsed_cmd template. (#109062) If your arguments or option values are of a type that naturally uses one of our common completion mechanisms, you will get completion for free. But if you have your own custom values or if you want to do fancy things like have `break set -s foo.dylib -n ba` only complete on symbols in foo.dylib, you can use this new mechanism to achieve that. --- lldb/bindings/python/python-wrapper.swig | 73 +++++++ lldb/docs/use/python-reference.rst | 185 ++++++++++++++++- lldb/examples/python/cmdtemplate.py | 15 +- lldb/examples/python/templates/parsed_cmd.py | 97 ++++++++- .../lldb/Interpreter/ScriptInterpreter.h | 14 ++ lldb/include/lldb/Utility/CompletionRequest.h | 2 + .../source/Commands/CommandObjectCommands.cpp | 191 ++++++++++++++++++ lldb/source/Interpreter/Options.cpp | 5 +- .../Python/SWIGPythonBridge.h | 9 + .../Python/ScriptInterpreterPython.cpp | 40 ++++ .../Python/ScriptInterpreterPythonImpl.h | 8 + .../script/add/TestAddParsedCommand.py | 132 ++++++++---- .../command/script/add/test_commands.py | 69 +++++-- .../Python/PythonTestSuite.cpp | 13 ++ 14 files changed, 771 insertions(+), 82 deletions(-) diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 961fb2d1a7617..b72a462d04643 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -667,6 +667,79 @@ lldb_private::python::SWIGBridge::LLDBSwigPythonGetRepeatCommandForScriptedComma return result.Str().GetString().str(); } +StructuredData::DictionarySP +lldb_private::python::SWIGBridge::LLDBSwigPythonHandleArgumentCompletionForScriptedCommand(PyObject *implementor, + std::vector &args_vec, size_t args_pos, size_t pos_in_arg) { + + PyErr_Cleaner py_err_cleaner(true); + + PythonObject self(PyRefType::Borrowed, implementor); + auto pfunc = self.ResolveName("handle_argument_completion"); + // If this isn't implemented, return an empty dict to signal falling back to default completion: + if (!pfunc.IsAllocated()) + return {}; + + PythonList args_list(PyInitialValue::Empty); + for (auto elem : args_vec) + args_list.AppendItem(PythonString(elem)); + + PythonObject result = pfunc(args_list, PythonInteger(args_pos), PythonInteger(pos_in_arg)); + // Returning None means do the ordinary completion + if (result.IsNone()) + return {}; + + // Convert the return dictionary to a DictionarySP. + StructuredData::ObjectSP result_obj_sp = result.CreateStructuredObject(); + if (!result_obj_sp) + return {}; + + StructuredData::DictionarySP dict_sp(new StructuredData::Dictionary(result_obj_sp)); + if (dict_sp->GetType() == lldb::eStructuredDataTypeInvalid) + return {}; + return dict_sp; +} + +StructuredData::DictionarySP +lldb_private::python::SWIGBridge::LLDBSwigPythonHandleOptionArgumentCompletionForScriptedCommand(PyObject *implementor, + llvm::StringRef &long_option, size_t pos_in_arg) { + + PyErr_Cleaner py_err_cleaner(true); + + PythonObject self(PyRefType::Borrowed, implementor); + auto pfunc = self.ResolveName("handle_option_argument_completion"); + // If this isn't implemented, return an empty dict to signal falling back to default completion: + if (!pfunc.IsAllocated()) + return {}; + + PythonObject result = pfunc(PythonString(long_option), PythonInteger(pos_in_arg)); + // Returning None means do the ordinary completion + if (result.IsNone()) + return {}; + + // Returning a boolean: + // True means the completion was handled, but there were no completions + // False means that the completion was not handled, again, do the ordinary completion: + if (result.GetObjectType() == PyObjectType::Boolean) { + if (!result.IsTrue()) + return {}; + // Make up a completion dictionary with the right element: + StructuredData::DictionarySP dict_sp(new StructuredData::Dictionary()); + dict_sp->AddBooleanItem("no-completion", true); + return dict_sp; + } + + + // Convert the return dictionary to a DictionarySP. + StructuredData::ObjectSP result_obj_sp = result.CreateStructuredObject(); + if (!result_obj_sp) + return {}; + + StructuredData::DictionarySP dict_sp(new StructuredData::Dictionary(result_obj_sp)); + if (dict_sp->GetType() == lldb::eStructuredDataTypeInvalid) + return {}; + return dict_sp; +} + #include "lldb/Interpreter/CommandReturnObject.h" bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallParsedCommandObject( diff --git a/lldb/docs/use/python-reference.rst b/lldb/docs/use/python-reference.rst index b12048f1af067..95a6020ca3e45 100644 --- a/lldb/docs/use/python-reference.rst +++ b/lldb/docs/use/python-reference.rst @@ -551,7 +551,7 @@ command definition form can't do the right thing. Since lldb 3.7, Python commands can also be implemented by means of a class which should implement the following interface: -:: +.. code-block:: python class CommandObjectType: def __init__(self, debugger, internal_dict): @@ -586,20 +586,193 @@ which should implement the following interface: As a convenience, you can treat the result object as a Python file object, and say -:: +.. code-block:: python print >>result, "my command does lots of cool stuff" SBCommandReturnObject and SBStream both support this file-like behavior by providing write() and flush() calls at the Python layer. +The commands that are added using this class definition are what lldb calls +"raw" commands. The command interpreter doesn't attempt to parse the command, +doesn't handle option values, neither generating help for them, or their +completion. Raw commands are useful when the arguments passed to the command +are unstructured, and having to protect them against lldb command parsing would +be onerous. For instance, "expr" is a raw command. + +You can also add scripted commands that implement the "parsed command", where +the options and their types are specified, as well as the argument and argument +types. These commands look and act like the majority of lldb commands, and you +can also add custom completions for the options and/or the arguments if you have +special needs. + +The easiest way to do this is to derive your new command from the lldb.ParsedCommand +class. That responds in the same way to the help & repeat command interfaces, and +provides some convenience methods, and most importantly an LLDBOptionValueParser, +accessed throught lldb.ParsedCommand.get_parser(). The parser is used to set +your command definitions, and to retrieve option values in the __call__ method. + +To set up the command definition, implement the ParsedCommand abstract method: + +.. code-block:: python + + def setup_command_definition(self): + +This is called when your command is added to lldb. In this method you add the +options and their types, the option help strings, etc. to the command using the API: + +.. code-block:: python + + def add_option(self, short_option, long_option, help, default, + dest = None, required=False, groups = None, + value_type=lldb.eArgTypeNone, completion_type=None, + enum_values=None): + """ + short_option: one character, must be unique, not required + long_option: no spaces, must be unique, required + help: a usage string for this option, will print in the command help + default: the initial value for this option (if it has a value) + dest: the name of the property that gives you access to the value for + this value. Defaults to the long option if not provided. + required: if true, this option must be provided or the command will error out + groups: Which "option groups" does this option belong to. This can either be + a simple list (e.g. [1, 3, 4, 5]) or you can specify ranges by sublists: + so [1, [3,5]] is the same as [1, 3, 4, 5]. + value_type: one of the lldb.eArgType enum values. Some of the common arg + types also have default completers, which will be applied automatically. + completion_type: currently these are values form the lldb.CompletionType enum. If + you need custom completions, implement handle_option_argument_completion. + enum_values: An array of duples: ["element_name", "element_help"]. If provided, + only one of the enum elements is allowed. The value will be the + element_name for the chosen enum element as a string. + """ + +Similarly, you can add argument types to the command: + +.. code-block:: python + + def make_argument_element(self, arg_type, repeat = "optional", groups = None): + """ + arg_type: The argument type, one of the lldb.eArgType enum values. + repeat: Choose from the following options: + "plain" - one value + "optional" - zero or more values + "plus" - one or more values + groups: As with add_option. + """ + +Then implement the body of the command by defining: + +.. code-block:: python + + def __call__(self, debugger, args_array, exe_ctx, result): + """This is the command callback. The option values are + provided by the 'dest' properties on the parser. + + args_array: This is the list of arguments provided. + exe_ctx: Gives the SBExecutionContext on which the + command should operate. + result: Any results of the command should be + written into this SBCommandReturnObject. + """ + +This differs from the "raw" command's __call__ in that the arguments are already +parsed into the args_array, and the option values are set in the parser, and +can be accessed using their property name. The LLDBOptionValueParser class has +a couple of other handy methods: + +.. code-block:: python + def was_set(self, long_option_name): + +returns True if the option was specified on the command line. + +.. code-block:: python + + def dest_for_option(self, long_option_name): + """ + This will return the value of the dest variable you defined for opt_name. + Mostly useful for handle_completion where you get passed the long option. + """ + +lldb will handle completing your option names, and all your enum values +automatically. If your option or argument types have associated built-in completers, +then lldb will also handle that completion for you. But if you have a need for +custom completions, either in your arguments or option values, you can handle +completion by hand as well. To handle completion of option value arguments, +your lldb.ParsedCommand subclass should implement: + +.. code-block:: python + + def handle_option_argument_completion(self, long_option, cursor_pos): + """ + long_option: The long option name of the option whose value you are + asked to complete. + cursor_pos: The cursor position in the value for that option - which + you can get from the option parser. + """ + +And to handle the completion of arguments: + +.. code-block:: python + + def handle_argument_completion(self, args, arg_pos, cursor_pos): + """ + args: A list of the arguments to the command + arg_pos: An index into the args list of the argument with the cursor + cursor_pos: The cursor position in the arg specified by arg_pos + """ + +When either of these API's is called, the command line will have been parsed up to +the word containing the cursor, and any option values set in that part of the command +string are available from the option value parser. That's useful for instance +if you have a --shared-library option that would constrain the completions for, +say, a symbol name option or argument. + +The return value specifies what the completion options are. You have four +choices: + +- `True`: the completion was handled with no completions. + +- `False`: the completion was not handled, forward it to the regular +completion machinery. + +- A dictionary with the key: "completion": there is one candidate, +whose value is the value of the "completion" key. Optionally you can pass a +"mode" key whose value is either "partial" or "complete". Return partial if +the "completion" string is a prefix for all the completed value. + +For instance, if the string you are completing is "Test" and the available completions are: +"Test1", "Test11" and "Test111", you should return the dictionary: + +.. code-block:: python + + return {"completion": "Test1", "mode" : "partial"} + +and then lldb will add the "1" at the curson and advance it after the added string, +waiting for more completions. But if "Test1" is the only completion, return: + +.. code-block:: python + + {"completion": "Test1", "mode": "complete"} + +and lldb will add "1 " at the cursor, indicating the command string is complete. + +The default is "complete", you don't need to specify a "mode" in that case. + +- A dictionary with the key: "values" whose value is a list of candidate completion +strings. The command interpreter will present those strings as the available choices. +You can optionally include a "descriptions" key, whose value is a parallel array +of description strings, and the completion will show the description next to +each completion. + + One other handy convenience when defining lldb command-line commands is the -command command script import which will import a module specified by file +command "command script import" which will import a module specified by file path, so you don't have to change your PYTHONPATH for temporary scripts. It also has another convenience that if your new script module has a function of the form: -:: +.. code-block python def __lldb_init_module(debugger, internal_dict): # Command Initialization code goes here @@ -615,7 +788,7 @@ creating scripts that can be run from the command line. However, for command line scripts, the debugger instance must be created manually. Sample code would look like: -:: +.. code-block:: python if __name__ == '__main__': # Initialize the debugger before making any API calls. @@ -638,7 +811,7 @@ look like: Now we can create a module called ls.py in the file ~/ls.py that will implement a function that can be used by LLDB's python command code: -:: +.. code-block:: python #!/usr/bin/env python diff --git a/lldb/examples/python/cmdtemplate.py b/lldb/examples/python/cmdtemplate.py index b6a21cba7113e..a9fbe0b40e195 100644 --- a/lldb/examples/python/cmdtemplate.py +++ b/lldb/examples/python/cmdtemplate.py @@ -29,8 +29,8 @@ def get_flags(self): return lldb.eCommandRequiresFrame | lldb.eCommandProcessMustBePaused def setup_command_definition(self): - - self.ov_parser.add_option( + ov_parser = self.get_parser() + ov_parser.add_option( "i", "in-scope", help = "in_scope_only = True", @@ -39,7 +39,7 @@ def setup_command_definition(self): default = True, ) - self.ov_parser.add_option( + ov_parser.add_option( "i", "in-scope", help = "in_scope_only = True", @@ -48,7 +48,7 @@ def setup_command_definition(self): default=True, ) - self.ov_parser.add_option( + ov_parser.add_option( "a", "arguments", help = "arguments = True", @@ -57,7 +57,7 @@ def setup_command_definition(self): default = True, ) - self.ov_parser.add_option( + ov_parser.add_option( "l", "locals", help = "locals = True", @@ -66,7 +66,7 @@ def setup_command_definition(self): default = True, ) - self.ov_parser.add_option( + ov_parser.add_option( "s", "statics", help = "statics = True", @@ -103,8 +103,9 @@ def __call__(self, debugger, command, exe_ctx, result): result.SetError("invalid frame") return + ov_parser = self.get_parser() variables_list = frame.GetVariables( - self.ov_parser.arguments, self.ov_parser.locals, self.ov_parser.statics, self.ov_parser.inscope + ov_parser.arguments, ov_parser.locals, ov_parser.statics, ov_parser.inscope ) variables_count = variables_list.GetSize() if variables_count == 0: diff --git a/lldb/examples/python/templates/parsed_cmd.py b/lldb/examples/python/templates/parsed_cmd.py index 06124adf43420..13d6eae405c08 100644 --- a/lldb/examples/python/templates/parsed_cmd.py +++ b/lldb/examples/python/templates/parsed_cmd.py @@ -4,7 +4,8 @@ The way to use it is to make a class for your command that inherits from ParsedCommandBase. That will make an LLDBOptionValueParser which you will use for your option definition, and to fetch option values for the current invocation -of your command. Access to the OV parser is through: +of your command. For concision, I'll call this the `OVParser`. +Access to the `OVParser` is through: ParsedCommandBase.get_parser() @@ -43,7 +44,65 @@ def __call__(self, debugger, args_list, exe_ctx, result): will return True if the user set this option, and False if it was left at its default value. -There are example commands in the lldb testsuite at: +Custom Completions: + +You can also implement custom completers for your custom command, either for the +arguments to your command or to the option values in your command. If you use enum +values or if your option/argument uses is one of the types we have completers for, +you should not need to do this. But if you have your own completeable types, or if +you want completion of one option to be conditioned by other options on the command +line, you can use this interface to take over the completion. + +You can choose to add a completion for the option values defined for your command, +or for the arguments, separately. For the option values, define: + +def handle_option_argument_completion(self, long_option, cursor_pos): + +The line to be completed will be parsed up to the option containint the cursor position, +and the values will be set in the OptionValue parser object. long_option will be +the option name containing the cursor, and cursor_pos will be the position of the cursor +in that option's value. You can call the `OVParser` method: `dest_for_option(long_option)` +to get the value for that option. The other options that came before the cursor in the command +line will also be set in the `OVParser` when the completion handler is called. + +For argument values, define: + +def handle_argument_completion(self, args, arg_pos, cursor_pos): + +Again, the command line will be parsed up to the cursor position, and all the options +before the cursor pose will be set in the `OVParser`. args is a python list of the +arguments, arg_pos is the index of the argument with the cursor, and cursor_pos is +the position of the cursor in the argument. + +In both cases, the return value determines the completion. + +Return False to mean "Not Handled" - in which case lldb will fall back on the +standard completion machinery. + +Return True to mean "Handled with no completions". + +If there is a single unique completion, return a Python dictionary with two elements: + +return {"completion" : "completed_value", "mode" : <"partial", "complete">} + +If the mode is "partial", then the completion is to a common base, if it is "complete" +then the argument is considered done - mostly meaning lldb will put a space after the +completion string. "complete" is the default if no "mode" is specified. + +If there are multiple completion options, then return: + +return {"values" : ["option1", "option2"]} + +Optionally, you can return a parallel array of "descriptions" which the completer will +print alongside the options: + +return {"values" : ["option1", "option2"], "descriptions" : ["the first option", "the second option"]} + +The cmdtemplate example currently uses the parsed command infrastructure: + +llvm-project/lldb/examples/python/cmdtemplate.py + +There are also a few example commands in the lldb testsuite at: llvm-project/lldb/test/API/commands/command/script/add/test_commands.py """ @@ -226,10 +285,14 @@ def set_option_value(self, exe_ctx, opt_name, opt_value): return True def was_set(self, opt_name): - """ Call this in the __call__ method of your command to determine - whether this option was set on the command line. It is sometimes - useful to know whether an option has the default value because the - user set it explicitly (was_set -> True) or not. """ + """Call this in the __call__ method of your command to determine + whether this option was set on the command line. It is sometimes + useful to know whether an option has the default value because the + user set it explicitly (was_set -> True) or not. + You can also call this in a handle_completion method, but it will + currently only report true values for the options mentioned + BEFORE the cursor point in the command line. + """ elem = self.get_option_element(opt_name) if not elem: @@ -239,6 +302,16 @@ def was_set(self, opt_name): except AttributeError: return False + def dest_for_option(self, opt_name): + """This will return the value of the dest variable you defined for opt_name. + Mostly useful for handle_completion where you get passed the long option. + """ + elem = self.get_option_element(opt_name) + if not elem: + return None + value = self.__dict__[elem["dest"]] + return value + def add_option(self, short_option, long_option, help, default, dest = None, required=False, groups = None, value_type=lldb.eArgTypeNone, completion_type=None, @@ -251,14 +324,16 @@ def add_option(self, short_option, long_option, help, default, dest: the name of the property that gives you access to the value for this value. Defaults to the long option if not provided. required: if true, this option must be provided or the command will error out - groups: Which "option groups" does this option belong to + groups: Which "option groups" does this option belong to. This can either be + a simple list (e.g. [1, 3, 4, 5]) or you can specify ranges by sublists: + so [1, [3,5]] is the same as [1, 3, 4, 5]. value_type: one of the lldb.eArgType enum values. Some of the common arg types also have default completers, which will be applied automatically. - completion_type: currently these are values form the lldb.CompletionType enum, I - haven't done custom completions yet. + completion_type: currently these are values form the lldb.CompletionType enum. If + you need custom completions, implement handle_option_argument_completion. enum_values: An array of duples: ["element_name", "element_help"]. If provided, - only one of the enum elements is allowed. The value will be the - element_name for the chosen enum element as a string. + only one of the enum elements is allowed. The value will be the + element_name for the chosen enum element as a string. """ if not dest: dest = long_option diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h index 901ecf3012d51..2c2bd6f232e09 100644 --- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h +++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h @@ -420,6 +420,20 @@ class ScriptInterpreter : public PluginInterface { return std::nullopt; } + virtual StructuredData::DictionarySP + HandleArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, std::vector &args, + size_t args_pos, size_t char_in_arg) { + return {}; + } + + virtual StructuredData::DictionarySP + HandleOptionArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, llvm::StringRef &long_name, + size_t char_in_arg) { + return {}; + } + virtual bool RunScriptFormatKeyword(const char *impl_function, Process *process, std::string &output, Status &error) { diff --git a/lldb/include/lldb/Utility/CompletionRequest.h b/lldb/include/lldb/Utility/CompletionRequest.h index 1a2b1d639950f..650158a197dbd 100644 --- a/lldb/include/lldb/Utility/CompletionRequest.h +++ b/lldb/include/lldb/Utility/CompletionRequest.h @@ -139,6 +139,8 @@ class CompletionRequest { return GetParsedLine()[GetCursorIndex()]; } + size_t GetCursorCharPos() const { return m_cursor_char_position; } + /// Drops the first argument from the argument list. void ShiftArguments() { m_cursor_index--; diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index e3291640fa935..845b89a75b7b3 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -1637,6 +1637,129 @@ class CommandObjectScriptingObjectParsed : public CommandObjectParsed { size_t GetNumOptions() { return m_num_options; } + void PrepareOptionsForCompletion(CompletionRequest &request, + OptionElementVector &option_vec, + ExecutionContext *exe_ctx) { + // I'm not sure if we'll get into trouble doing an option parsing start + // and end in this context. If so, then I'll have to directly tell the + // scripter to do this. + OptionParsingStarting(exe_ctx); + auto opt_defs = GetDefinitions(); + + // Iterate through the options we found so far, and push them into + // the scripted side. + for (auto option_elem : option_vec) { + int cur_defs_index = option_elem.opt_defs_index; + // If we don't recognize this option we can't set it. + if (cur_defs_index == OptionArgElement::eUnrecognizedArg || + cur_defs_index == OptionArgElement::eBareDash || + cur_defs_index == OptionArgElement::eBareDoubleDash) + continue; + bool option_has_arg = opt_defs[cur_defs_index].option_has_arg; + llvm::StringRef cur_arg_value; + if (option_has_arg) { + int cur_arg_pos = option_elem.opt_arg_pos; + if (cur_arg_pos != OptionArgElement::eUnrecognizedArg && + cur_arg_pos != OptionArgElement::eBareDash && + cur_arg_pos != OptionArgElement::eBareDoubleDash) { + cur_arg_value = + request.GetParsedLine().GetArgumentAtIndex(cur_arg_pos); + } + } + SetOptionValue(cur_defs_index, cur_arg_value, exe_ctx); + } + OptionParsingFinished(exe_ctx); + } + + void + ProcessCompletionDict(CompletionRequest &request, + StructuredData::DictionarySP &completion_dict_sp) { + // We don't know how to process an empty completion dict, our callers have + // to do that. + assert(completion_dict_sp && "Must have valid completion dict"); + // First handle the case of a single completion: + llvm::StringRef completion; + // If the dictionary has one element "no-completion" then we return here + if (completion_dict_sp->GetValueForKeyAsString("no-completion", + completion)) + return; + + if (completion_dict_sp->GetValueForKeyAsString("completion", + completion)) { + llvm::StringRef mode_str; + CompletionMode mode = CompletionMode::Normal; + if (completion_dict_sp->GetValueForKeyAsString("mode", mode_str)) { + if (mode_str == "complete") + mode = CompletionMode::Normal; + else if (mode_str == "partial") + mode = CompletionMode::Partial; + else { + // FIXME - how do I report errors here? + return; + } + } + request.AddCompletion(completion, "", mode); + return; + } + // The completions are required, the descriptions are not: + StructuredData::Array *completions; + StructuredData::Array *descriptions; + if (completion_dict_sp->GetValueForKeyAsArray("values", completions)) { + completion_dict_sp->GetValueForKeyAsArray("descriptions", descriptions); + size_t num_completions = completions->GetSize(); + for (size_t idx = 0; idx < num_completions; idx++) { + auto val = completions->GetItemAtIndexAsString(idx); + if (!val) + // FIXME: How do I report this error? + return; + + if (descriptions) { + auto desc = descriptions->GetItemAtIndexAsString(idx); + request.AddCompletion(*val, desc ? *desc : ""); + } else + request.AddCompletion(*val); + } + } + } + + void + HandleOptionArgumentCompletion(lldb_private::CompletionRequest &request, + OptionElementVector &option_vec, + int opt_element_index, + CommandInterpreter &interpreter) override { + ScriptInterpreter *scripter = + interpreter.GetDebugger().GetScriptInterpreter(); + + if (!scripter) + return; + + ExecutionContext exe_ctx = interpreter.GetExecutionContext(); + PrepareOptionsForCompletion(request, option_vec, &exe_ctx); + + auto defs = GetDefinitions(); + + size_t defs_index = option_vec[opt_element_index].opt_defs_index; + llvm::StringRef option_name = defs[defs_index].long_option; + bool is_enum = defs[defs_index].enum_values.size() != 0; + if (option_name.empty()) + return; + // If this is an enum, we don't call the custom completer, just let the + // regular option completer handle that: + StructuredData::DictionarySP completion_dict_sp; + if (!is_enum) + completion_dict_sp = + scripter->HandleOptionArgumentCompletionForScriptedCommand( + m_cmd_obj_sp, option_name, request.GetCursorCharPos()); + + if (!completion_dict_sp) { + Options::HandleOptionArgumentCompletion(request, option_vec, + opt_element_index, interpreter); + return; + } + + ProcessCompletionDict(request, completion_dict_sp); + } + private: struct EnumValueStorage { EnumValueStorage() { @@ -1878,6 +2001,74 @@ class CommandObjectScriptingObjectParsed : public CommandObjectParsed { Status GetArgsError() { return m_args_error.Clone(); } bool WantsCompletion() override { return true; } +private: + void PrepareOptionsForCompletion(CompletionRequest &request, + OptionElementVector &option_vec) { + // First, we have to tell the Scripted side to set the values in its + // option store, then we call into the handle_completion passing in + // an array of the args, the arg index and the cursor position in the arg. + // We want the script side to have a chance to clear its state, so tell + // it argument parsing has started: + Options *options = GetOptions(); + // If there are not options, this will be nullptr, and in that case we + // can just skip setting the options on the scripted side: + if (options) + m_options.PrepareOptionsForCompletion(request, option_vec, &m_exe_ctx); + } + +public: + void HandleArgumentCompletion(CompletionRequest &request, + OptionElementVector &option_vec) override { + ScriptInterpreter *scripter = GetDebugger().GetScriptInterpreter(); + + if (!scripter) + return; + + // Set up the options values on the scripted side: + PrepareOptionsForCompletion(request, option_vec); + + // Now we have to make up the argument list. + // The ParseForCompletion only identifies tokens in the m_parsed_line + // it doesn't remove the options leaving only the args as it does for + // the regular Parse, so we have to filter out the option ones using the + // option_element_vector: + + Options *options = GetOptions(); + auto defs = options->GetDefinitions(); + + std::unordered_set option_slots; + for (const auto &elem : option_vec) { + if (elem.opt_defs_index == -1) + continue; + option_slots.insert(elem.opt_pos); + if (defs[elem.opt_defs_index].option_has_arg) + option_slots.insert(elem.opt_arg_pos); + } + + std::vector args_vec; + Args &args = request.GetParsedLine(); + size_t num_args = args.GetArgumentCount(); + size_t cursor_idx = request.GetCursorIndex(); + size_t args_elem_pos = cursor_idx; + + for (size_t idx = 0; idx < num_args; idx++) { + if (option_slots.count(idx) == 0) + args_vec.push_back(args[idx].ref()); + else if (idx < cursor_idx) + args_elem_pos--; + } + StructuredData::DictionarySP completion_dict_sp = + scripter->HandleArgumentCompletionForScriptedCommand( + m_cmd_obj_sp, args_vec, args_elem_pos, request.GetCursorCharPos()); + + if (!completion_dict_sp) { + CommandObject::HandleArgumentCompletion(request, option_vec); + return; + } + + m_options.ProcessCompletionDict(request, completion_dict_sp); + } + bool IsRemovable() const override { return true; } ScriptedCommandSynchronicity GetSynchronicity() { return m_synchro; } diff --git a/lldb/source/Interpreter/Options.cpp b/lldb/source/Interpreter/Options.cpp index b8a3f68a49b1c..3888a5812628c 100644 --- a/lldb/source/Interpreter/Options.cpp +++ b/lldb/source/Interpreter/Options.cpp @@ -661,7 +661,9 @@ bool Options::HandleOptionCompletion(CompletionRequest &request, } else if (opt_arg_pos == request.GetCursorIndex()) { // Okay the cursor is on the completion of an argument. See if it has a - // completion, otherwise return no matches. + // completion, otherwise return no matches. Note, opt_defs_index == -1 + // means we're after an option, but that option doesn't exist. We'll + // end up treating that as an argument. Not sure we can do much better. if (opt_defs_index != -1) { HandleOptionArgumentCompletion(request, opt_element_vector, i, interpreter); @@ -688,7 +690,6 @@ void Options::HandleOptionArgumentCompletion( int opt_defs_index = opt_element_vector[opt_element_index].opt_defs_index; // See if this is an enumeration type option, and if so complete it here: - const auto &enum_values = opt_defs[opt_defs_index].enum_values; if (!enum_values.empty()) for (const auto &enum_value : enum_values) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h index 81ee9ea0a2fa1..518a478af5f6a 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h @@ -200,6 +200,15 @@ class SWIGBridge { LLDBSwigPythonGetRepeatCommandForScriptedCommand(PyObject *implementor, std::string &command); + static StructuredData::DictionarySP + LLDBSwigPythonHandleArgumentCompletionForScriptedCommand( + PyObject *implementor, std::vector &args_impl, + size_t args_pos, size_t pos_in_arg); + + static StructuredData::DictionarySP + LLDBSwigPythonHandleOptionArgumentCompletionForScriptedCommand( + PyObject *implementor, llvm::StringRef &long_option, size_t pos_in_arg); + static bool LLDBSwigPythonCallModuleInit(const char *python_module_name, const char *session_dictionary_name, lldb::DebuggerSP debugger); diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 155efc06eaf41..db1a10e73a66a 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -2720,6 +2720,46 @@ ScriptInterpreterPythonImpl::GetRepeatCommandForScriptedCommand( return ret_val; } +StructuredData::DictionarySP +ScriptInterpreterPythonImpl::HandleArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, std::vector &args, + size_t args_pos, size_t char_in_arg) { + StructuredData::DictionarySP completion_dict_sp; + if (!impl_obj_sp || !impl_obj_sp->IsValid()) + return completion_dict_sp; + + { + Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, + Locker::FreeLock); + + completion_dict_sp = + SWIGBridge::LLDBSwigPythonHandleArgumentCompletionForScriptedCommand( + static_cast(impl_obj_sp->GetValue()), args, args_pos, + char_in_arg); + } + return completion_dict_sp; +} + +StructuredData::DictionarySP +ScriptInterpreterPythonImpl::HandleOptionArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, llvm::StringRef &long_option, + size_t char_in_arg) { + StructuredData::DictionarySP completion_dict_sp; + if (!impl_obj_sp || !impl_obj_sp->IsValid()) + return completion_dict_sp; + + { + Locker py_lock(this, Locker::AcquireLock | Locker::NoSTDIN, + Locker::FreeLock); + + completion_dict_sp = SWIGBridge:: + LLDBSwigPythonHandleOptionArgumentCompletionForScriptedCommand( + static_cast(impl_obj_sp->GetValue()), long_option, + char_in_arg); + } + return completion_dict_sp; +} + /// In Python, a special attribute __doc__ contains the docstring for an object /// (function, method, class, ...) if any is defined Otherwise, the attribute's /// value is None. diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index d15e2fd76f683..2dc784777151b 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -166,6 +166,14 @@ class ScriptInterpreterPythonImpl : public ScriptInterpreterPython { GetRepeatCommandForScriptedCommand(StructuredData::GenericSP impl_obj_sp, Args &args) override; + StructuredData::DictionarySP HandleArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, std::vector &args, + size_t args_pos, size_t char_in_arg) override; + + StructuredData::DictionarySP HandleOptionArgumentCompletionForScriptedCommand( + StructuredData::GenericSP impl_obj_sp, llvm::StringRef &long_options, + size_t char_in_arg) override; + Status GenerateFunction(const char *signature, const StringList &input, bool is_callback) override; diff --git a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py index c7680e9bb7f41..6fac1eba919bc 100644 --- a/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py +++ b/lldb/test/API/commands/command/script/add/TestAddParsedCommand.py @@ -68,6 +68,57 @@ def run_one_repeat(self, commands, expected_num_errors): return results + def handle_completion( + self, + cmd_str, + exp_num_completions, + exp_matches, + exp_descriptions, + match_description, + ): + matches = lldb.SBStringList() + descriptions = lldb.SBStringList() + + interp = self.dbg.GetCommandInterpreter() + num_completions = interp.HandleCompletionWithDescriptions( + cmd_str, len(cmd_str), 0, 1000, matches, descriptions + ) + self.assertEqual( + num_completions, exp_num_completions, "Number of completions is right." + ) + num_matches = matches.GetSize() + self.assertEqual( + num_matches, + exp_matches.GetSize(), + "matches and expected matches of different lengths", + ) + num_descriptions = descriptions.GetSize() + if match_description: + self.assertEqual( + num_descriptions, + exp_descriptions.GetSize(), + "descriptions and expected of different lengths", + ) + + self.assertEqual( + matches.GetSize(), + num_completions + 1, + "The first element is the complete additional text", + ) + + for idx in range(0, num_matches): + match = matches.GetStringAtIndex(idx) + exp_match = exp_matches.GetStringAtIndex(idx) + self.assertEqual( + match, exp_match, f"{match} did not match expectation: {exp_match}" + ) + if match_description: + desc = descriptions.GetStringAtIndex(idx) + exp_desc = exp_descriptions.GetStringAtIndex(idx) + self.assertEqual( + desc, exp_desc, f"{desc} didn't match expectation: {exp_desc}" + ) + def pycmd_tests(self): source_dir = self.getSourceDir() test_file_path = os.path.join(source_dir, "test_commands.py") @@ -176,24 +227,10 @@ def cleanup(): descriptions = lldb.SBStringList() # First try an enum completion: - num_completions = interp.HandleCompletionWithDescriptions( - "no-args -e f", 12, 0, 1000, matches, descriptions - ) - self.assertEqual(num_completions, 1, "Only one completion for foo") - self.assertEqual( - matches.GetSize(), 2, "The first element is the complete additional text" - ) - self.assertEqual( - matches.GetStringAtIndex(0), "oo ", "And we got the right extra characters" - ) - self.assertEqual( - matches.GetStringAtIndex(1), "foo", "And we got the right match" - ) - self.assertEqual( - descriptions.GetSize(), 2, "descriptions matche the return length" - ) - # FIXME: we don't return descriptions for enum elements - # self.assertEqual(descriptions.GetStringAtIndex(1), "does foo things", "And we got the right description") + # Note - this is an enum so all the values are returned: + matches.AppendList(["oo ", "foo"], 2) + + self.handle_completion("no-args -e f", 1, matches, descriptions, False) # Now try an internal completer, the on disk file one is handy: partial_name = os.path.join(source_dir, "test_") @@ -201,24 +238,9 @@ def cleanup(): matches.Clear() descriptions.Clear() - num_completions = interp.HandleCompletionWithDescriptions( - cmd_str, len(cmd_str) - 1, 0, 1000, matches, descriptions - ) - self.assertEqual(num_completions, 1, "Only one completion for source file") - self.assertEqual(matches.GetSize(), 2, "The first element is the complete line") - self.assertEqual( - matches.GetStringAtIndex(0), - "commands.py' ", - "And we got the right extra characters", - ) - self.assertEqual( - matches.GetStringAtIndex(1), test_file_path, "And we got the right match" - ) - self.assertEqual( - descriptions.GetSize(), 2, "descriptions match the return length" - ) - # FIXME: we don't return descriptions for enum elements - # self.assertEqual(descriptions.GetStringAtIndex(1), "does foo things", "And we got the right description") + matches.AppendList(["commands.py' ", test_file_path], 2) + # We don't have descriptions for the file path completer: + self.handle_completion(cmd_str, 1, matches, descriptions, False) # Try a command with arguments. # FIXME: It should be enough to define an argument and it's type to get the completer @@ -231,6 +253,44 @@ def cleanup(): substrs=["0: First Argument", "1: Second Argument"], ) + # Now test custom completions - two-args has both option and arg completers. In both + # completers we return different values if the -p option is set, so we can test that too: + matches.Clear() + descriptions.Clear() + cmd_str = "two-args -p something -c other_" + matches.AppendString("something ") + matches.AppendString("other_something") + # This is a full match so no descriptions: + self.handle_completion(cmd_str, 1, matches, descriptions, False) + + matches.Clear() + descriptions.Clear() + cmd_str = "two-args -c other_" + matches.AppendList(["", "other_nice", "other_not_nice", "other_mediocre"], 4) + # The option doesn't return descriptions either: + self.handle_completion(cmd_str, 3, matches, descriptions, False) + + # Now try the argument - it says "no completions" if the proc_name was set: + matches.Clear() + descriptions.Clear() + cmd_str = "two-args -p something arg" + matches.AppendString("") + self.handle_completion(cmd_str, 0, matches, descriptions, False) + + cmd_str = "two-args arg_" + matches.Clear() + descriptions.Clear() + matches.AppendList(["", "arg_cool", "arg_yuck"], 3) + descriptions.AppendList(["", "good idea", "bad idea"], 3) + self.handle_completion(cmd_str, 2, matches, descriptions, True) + + # This one gets a single unique match: + cmd_str = "two-args correct_" + matches.Clear() + descriptions.Clear() + matches.AppendList(["answer ", "correct_answer"], 2) + self.handle_completion(cmd_str, 1, matches, descriptions, False) + # Now make sure get_repeat_command works properly: # no-args turns off auto-repeat diff --git a/lldb/test/API/commands/command/script/add/test_commands.py b/lldb/test/API/commands/command/script/add/test_commands.py index fcde6cd3ef6dc..b15ea935c0586 100644 --- a/lldb/test/API/commands/command/script/add/test_commands.py +++ b/lldb/test/API/commands/command/script/add/test_commands.py @@ -18,7 +18,7 @@ def __call__(self, debugger, args_array, exe_ctx, result): for long_option, elem in opt_def.items(): dest = elem["dest"] result.AppendMessage( - f"{long_option} (set: {elem['_value_set']}): {object.__getattribute__(self.ov_parser, dest)}\n" + f"{long_option} (set: {elem['_value_set']}): {object.__getattribute__(self.get_parser(), dest)}\n" ) else: result.AppendMessage("No options\n") @@ -31,7 +31,6 @@ def __call__(self, debugger, args_array, exe_ctx, result): f"{idx}: {args_array.GetItemAtIndex(idx).GetStringValue(10000)}\n" ) - # Use these to make sure that get_repeat_command sends the right # command. no_args_repeat = None @@ -49,7 +48,8 @@ def register_lldb_command(cls, debugger, module_name): ParsedCommand.do_register_cmd(cls, debugger, module_name) def setup_command_definition(self): - self.ov_parser.add_option( + ov_parser = self.get_parser() + ov_parser.add_option( "b", "bool-arg", "a boolean arg, defaults to True", @@ -59,7 +59,7 @@ def setup_command_definition(self): default=True, ) - self.ov_parser.add_option( + ov_parser.add_option( "s", "shlib-name", "A shared library name.", @@ -69,7 +69,7 @@ def setup_command_definition(self): default=None, ) - self.ov_parser.add_option( + ov_parser.add_option( "d", "disk-file-name", "An on disk filename", @@ -78,7 +78,7 @@ def setup_command_definition(self): default=None, ) - self.ov_parser.add_option( + ov_parser.add_option( "l", "line-num", "A line number", @@ -88,7 +88,7 @@ def setup_command_definition(self): default=0, ) - self.ov_parser.add_option( + ov_parser.add_option( "e", "enum-option", "An enum, doesn't actually do anything", @@ -126,8 +126,9 @@ def register_lldb_command(cls, debugger, module_name): ParsedCommand.do_register_cmd(cls, debugger, module_name) def setup_command_definition(self): - self.ov_parser.add_argument_set( - [self.ov_parser.make_argument_element(lldb.eArgTypeSourceFile, "plain")] + ov_parser = self.get_parser() + ov_parser.add_argument_set( + [ov_parser.make_argument_element(lldb.eArgTypeSourceFile, "plain")] ) def get_repeat_command(self, command): @@ -154,7 +155,8 @@ def register_lldb_command(cls, debugger, module_name): ParsedCommand.do_register_cmd(cls, debugger, module_name) def setup_command_definition(self): - self.ov_parser.add_option( + ov_parser = self.get_parser() + ov_parser.add_option( "l", "language", "language defaults to None", @@ -164,7 +166,7 @@ def setup_command_definition(self): default=None, ) - self.ov_parser.add_option( + ov_parser.add_option( "c", "log-channel", "log channel - defaults to lldb", @@ -174,7 +176,7 @@ def setup_command_definition(self): default="lldb", ) - self.ov_parser.add_option( + ov_parser.add_option( "p", "process-name", "A process name, defaults to None", @@ -183,25 +185,23 @@ def setup_command_definition(self): default=None, ) - self.ov_parser.add_argument_set( + ov_parser.add_argument_set( [ - self.ov_parser.make_argument_element( + ov_parser.make_argument_element( lldb.eArgTypeClassName, "plain", [1, 2] ), - self.ov_parser.make_argument_element( + ov_parser.make_argument_element( lldb.eArgTypeOffset, "optional", [1, 2] ), ] ) - self.ov_parser.add_argument_set( + ov_parser.add_argument_set( [ - self.ov_parser.make_argument_element( + ov_parser.make_argument_element( lldb.eArgTypePythonClass, "plain", [3, 4] ), - self.ov_parser.make_argument_element( - lldb.eArgTypePid, "optional", [3, 4] - ), + ov_parser.make_argument_element(lldb.eArgTypePid, "optional", [3, 4]), ] ) @@ -210,6 +210,35 @@ def get_repeat_command(self, command): two_arg_repeat = command return command + " THIRD_ARG" + def handle_option_argument_completion(self, long_option, cursor_pos): + ov_parser = self.get_parser() + value = ov_parser.dest_for_option(long_option)[0 : cursor_pos + 1] + proc_value = ov_parser.proc_name + if proc_value != None: + new_str = value + proc_value + ret_arr = {"completion": new_str, "mode": "partial"} + return ret_arr + + ret_arr = {"values": [value + "nice", value + "not_nice", value + "mediocre"]} + return ret_arr + + def handle_argument_completion(self, args, arg_pos, cursor_pos): + ov_parser = self.get_parser() + orig_arg = args[arg_pos][0:cursor_pos] + if orig_arg == "correct_": + ret_arr = {"completion": "correct_answer"} + return ret_arr + + if ov_parser.was_set("process-name"): + # No completions if proc_name was set. + return True + + ret_arr = { + "values": [orig_arg + "cool", orig_arg + "yuck"], + "descriptions": ["good idea", "bad idea"], + } + return ret_arr + def get_short_help(self): return "This is my short help string" diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index c67a2b4bf46e6..3faeb587c3a91 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -211,6 +211,19 @@ LLDBSwigPythonGetRepeatCommandForScriptedCommand(PyObject *implementor, return std::nullopt; } +StructuredData::DictionarySP +LLDBSwigPythonHandleArgumentCompletionForScriptedCommand( + PyObject *implementor, std::vector &args, size_t args_pos, + size_t pos_in_arg) { + return {}; +} + +StructuredData::DictionarySP +LLDBSwigPythonHandleOptionArgumentCompletionForScriptedCommand( + PyObject *implementor, llvm::StringRef &long_options, size_t char_in_arg) { + return {}; +} + bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallModuleInit( const char *python_module_name, const char *session_dictionary_name, lldb::DebuggerSP debugger) { From 090755234e5033de67be994e99e31f4d13dcdcc5 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Tue, 24 Sep 2024 10:01:12 -0700 Subject: [PATCH 025/212] [rtsan] Refactor initialization state to prevent hangs (#109830) Move to tri state (Uninitialized, Initialized, Initializing) for our init state. We currently just have 2 (Uninitialized and Initialized). --- compiler-rt/lib/rtsan/rtsan.cpp | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index b288da64ffbe2..e6d2481b2c2a3 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -22,11 +22,25 @@ using namespace __rtsan; using namespace __sanitizer; +namespace { +enum class InitializationState : u8 { + Uninitialized, + Initializing, + Initialized, +}; +} // namespace + static StaticSpinMutex rtsan_inited_mutex; static atomic_uint8_t rtsan_initialized = {0}; -static void SetInitialized() { - atomic_store(&rtsan_initialized, 1, memory_order_release); +static void SetInitializationState(InitializationState state) { + atomic_store(&rtsan_initialized, static_cast(state), + memory_order_release); +} + +static InitializationState GetInitializationState() { + return static_cast( + atomic_load(&rtsan_initialized, memory_order_acquire)); } static auto PrintDiagnosticsAndDieAction(DiagnosticsInfo info) { @@ -39,13 +53,14 @@ static auto PrintDiagnosticsAndDieAction(DiagnosticsInfo info) { extern "C" { SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_init() { - CHECK(!__rtsan_is_initialized()); + CHECK(GetInitializationState() == InitializationState::Uninitialized); + SetInitializationState(InitializationState::Initializing); SanitizerToolName = "RealtimeSanitizer"; InitializeFlags(); InitializeInterceptors(); - SetInitialized(); + SetInitializationState(InitializationState::Initialized); } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_ensure_initialized() { @@ -62,7 +77,7 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_ensure_initialized() { } SANITIZER_INTERFACE_ATTRIBUTE bool __rtsan_is_initialized() { - return atomic_load(&rtsan_initialized, memory_order_acquire) == 1; + return GetInitializationState() == InitializationState::Initialized; } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_realtime_enter() { @@ -83,6 +98,10 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_enable() { SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_notify_intercepted_call(const char *func_name) { + // While initializing, we need all intercepted functions to behave normally + if (GetInitializationState() == InitializationState::Initializing) + return; + __rtsan_ensure_initialized(); GET_CALLER_PC_BP; ExpectNotRealtime( From f4042077e2e3946ee35c1df8cab8237de6086480 Mon Sep 17 00:00:00 2001 From: vporpo Date: Tue, 24 Sep 2024 10:12:12 -0700 Subject: [PATCH 026/212] [SandboxIR] Implement a few Instruction member functions (#109820) This patch implements a few sandboxir::Instruction predicate functions. --- llvm/include/llvm/SandboxIR/SandboxIR.h | 16 ++++++++++++++++ llvm/unittests/SandboxIR/SandboxIRTest.cpp | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index d5e239e70da61..d4c907ce8327d 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -1935,6 +1935,22 @@ class Instruction : public sandboxir::User { /// \Returns this Instruction's opcode. Note that SandboxIR has its own opcode /// state to allow for new SandboxIR-specific instructions. Opcode getOpcode() const { return Opc; } + + // TODO: Missing function getOpcodeName(). + + bool isTerminator() const { + return cast(Val)->isTerminator(); + } + bool isUnaryOp() const { return cast(Val)->isUnaryOp(); } + bool isBinaryOp() const { return cast(Val)->isBinaryOp(); } + bool isIntDivRem() const { + return cast(Val)->isIntDivRem(); + } + bool isShift() const { return cast(Val)->isShift(); } + bool isCast() const { return cast(Val)->isCast(); } + + // TODO: More missing functions + /// Detach this from its parent BasicBlock without deleting it. void removeFromParent(); /// Detach this Value from its parent and delete it. diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 1fcc9cbea152c..42df09609b675 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -1769,6 +1769,7 @@ define void @foo(i8 %v1, ptr %ptr) { store volatile i8 %ld0, ptr %ptr %atomicrmw = atomicrmw add ptr %ptr, i8 %v1 acquire %udiv = udiv i8 %ld0, %v1 + %urem = urem i8 %ld0, %v1 call void @foo() ret void } @@ -1861,6 +1862,18 @@ define void @foo(i8 %v1, ptr %ptr) { for (auto &LLVMI : *LLVMBB1) { auto &I = cast(*Ctx.getValue(&LLVMI)); + // Check isTerminator(). + EXPECT_EQ(LLVMI.isTerminator(), I.isTerminator()); + // Check isUnaryOp(). + EXPECT_EQ(LLVMI.isUnaryOp(), I.isUnaryOp()); + // Check isBinaryOp(). + EXPECT_EQ(LLVMI.isBinaryOp(), I.isBinaryOp()); + // Check isIntDivRem(). + EXPECT_EQ(LLVMI.isIntDivRem(), I.isIntDivRem()); + // Check isShift(). + EXPECT_EQ(LLVMI.isShift(), I.isShift()); + // Check isCast(). + EXPECT_EQ(LLVMI.isCast(), I.isCast()); // Check isAssociative(). EXPECT_EQ(LLVMI.isAssociative(), I.isAssociative()); // Check isCommutative(). From 234193bae6cf8b19703c6e543b100517bb99a9f7 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram <96096277+nirvedhmeshram@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:24:59 -0500 Subject: [PATCH 027/212] [mlir][linalg] Vectorization support for convolution of i1 type (#109480) Normally convolutions present with the following linalg op region ``` ^bb0(%arg14: i4, %arg15: i4, %arg16: i4): %17 = arith.muli %arg14, %arg15 : i4 %18 = arith.addi %arg16, %17 : i4 linalg.yield %18 : i4 ``` However, for i1 due to strength reduction we get something like ``` ^bb0(%arg14: i1, %arg15: i1, %arg16: i1): %17 = arith.andi %arg14, %arg15 : i1 %18 = arith.ori %arg16, %17 : i1 linalg.yield %18 : i1 ``` This PR updates the logic to support this region for i1 types. --- .../Linalg/Transforms/Vectorization.cpp | 20 ++++-- .../Dialect/Linalg/vectorize-convolution.mlir | 65 +++++++++++++++++++ 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index c332307da4d33..fa20001f66182 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2987,10 +2987,15 @@ struct Conv1DGenerator if (!setOperKind(reduceOp)) return; auto maybeKind = getCombinerOpKind(reduceOp); - if (!maybeKind || (*maybeKind != vector::CombiningKind::ADD && + // Typically convolution will have a `Add` CombiningKind but for i1 type it + // can get strength reduced to `OR` which is also supported. This strength + // reduction logic is in `buildBinaryFn` helper in the Linalg dialect. + if (!maybeKind || ((*maybeKind != vector::CombiningKind::ADD && + *maybeKind != vector::CombiningKind::OR) && (oper != Pool || !isSupportedPoolKind(*maybeKind)))) { return; } + reductionKind = maybeKind.value(); auto rhsRank = rhsShapedType.getRank(); switch (oper) { @@ -3273,10 +3278,12 @@ struct Conv1DGenerator bindDims(ctx, n, w, f, c); lhs = promote(rewriter, loc, lhs, res.getType()); rhs = promote(rewriter, loc, rhs, res.getType()); - return rewriter.create( + auto contrationOp = rewriter.create( loc, lhs, rhs, res, /*indexingMaps=*/MapList{{n, w, c}, {c, f}, {n, w, f}}, /*iteratorTypes=*/ArrayRef{par, par, par, red}); + contrationOp.setKind(reductionKind); + return contrationOp; } // Create an outerproduct: lhs{w} * rhs{1} -> res{w} for single channel @@ -3666,6 +3673,7 @@ struct Conv1DGenerator int strideW, dilationW; Value lhsShaped, rhsShaped, resShaped; ShapedType lhsShapedType, rhsShapedType, resShapedType; + vector::CombiningKind reductionKind; // Sets oper, poolExtOp and isPoolExt for valid conv/pooling ops. // Returns true iff it is a valid conv/pooling op. @@ -3681,7 +3689,9 @@ struct Conv1DGenerator switch (numBlockArguments) { case 1: { // Will be convolution if feeder is a MulOp. - // Otherwise, if it can be pooling. + // A strength reduced version of MulOp for i1 type is AndOp which is also + // supported. Otherwise, it can be pooling. This strength reduction logic + // is in `buildBinaryFn` helper in the Linalg dialect. auto feedValIt = llvm::find_if_not(reduceOp->getOperands(), llvm::IsaPred); Operation *feedOp = (*feedValIt).getDefiningOp(); @@ -3689,7 +3699,9 @@ struct Conv1DGenerator oper = Pool; isPoolExt = true; poolExtOp = feedOp->getName().getIdentifier(); - } else if (!(isa(feedOp) && + } else if (!((isa(feedOp) || + (isa(feedOp) && + feedOp->getResultTypes()[0].isInteger(1))) && llvm::all_of(feedOp->getOperands(), [](Value v) { if (isa(v)) return true; diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir index 93e36a69567bd..7f4b9b986c81b 100644 --- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir @@ -39,6 +39,7 @@ func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x // CHECK: %[[CONTRACT_0:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind // CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER]], %[[V_OUTPUT_0]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> @@ -46,6 +47,7 @@ func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x // CHECK: %[[CONTRACT_1:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind // CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER]], %[[V_OUTPUT_1]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> @@ -61,6 +63,36 @@ func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x // ----- +// This test is same as above but for i1 type with the only difference being that +// the combining kind for `vector.contract` is `OR`. +func.func @conv1d_nwc_4x2x8_memref_i1(%input: memref<4x6x3xi1>, %filter: memref<1x3x8xi1>, %output: memref<4x2x8xi1>) { + linalg.conv_1d_nwc_wcf + {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} + ins(%input, %filter : memref<4x6x3xi1>, memref<1x3x8xi1>) + outs(%output : memref<4x2x8xi1>) + return +} +// CHECK: #[[INPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK: #[[FILTER_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> +// CHECK: #[[OUTPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> + +// CHECK: func @conv1d_nwc_4x2x8_memref_i1 +/// w == 0, kw == 0 +// CHECK: %[[CONTRACT_0:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind +// CHECK-SAME: : vector<4x1x3xi1>, vector<3x8xi1> into vector<4x1x8xi1> + +/// w == 1, kw == 0 +// CHECK: %[[CONTRACT_1:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind +// CHECK-SAME: : vector<4x1x3xi1>, vector<3x8xi1> into vector<4x1x8xi1> + +// ----- + // The i8i8i32 case is similar to f32 case, so checking one case is enough for // test coverage. func.func @conv1d_nwc_4x2x8_i8i8i32_memref(%input: memref<4x6x3xi8>, %filter: memref<1x3x8xi8>, %output: memref<4x2x8xi32>) { @@ -299,6 +331,7 @@ func.func @conv1d_ncw_4x8x2_memref(%input: memref<4x3x6xf32>, %filter: memref<8x // CHECK: %[[CONTRACT_0:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind // CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER]], %[[V_OUTPUT_0]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> @@ -306,6 +339,7 @@ func.func @conv1d_ncw_4x8x2_memref(%input: memref<4x3x6xf32>, %filter: memref<8x // CHECK: %[[CONTRACT_1:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind // CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER]], %[[V_OUTPUT_1]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> @@ -324,6 +358,37 @@ func.func @conv1d_ncw_4x8x2_memref(%input: memref<4x3x6xf32>, %filter: memref<8x // ----- +// This test is same as above but for i1 type with the only difference being that +// the combining kind for `vector.contract` is `OR`. +func.func @conv1d_ncw_4x8x2_memref_i1(%input: memref<4x3x6xi1>, %filter: memref<8x3x1xi1>, %output: memref<4x8x2xi1>) { + linalg.conv_1d_ncw_fcw + {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} + ins(%input, %filter : memref<4x3x6xi1>, memref<8x3x1xi1>) + outs(%output : memref<4x8x2xi1>) + return +} + +// CHECK: #[[INPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK: #[[FILTER_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> +// CHECK: #[[OUTPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> + +// CHECK: func @conv1d_ncw_4x8x2_memref_i1 +/// w == 0, kw == 0 +// CHECK: vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind +// CHECK-SAME: : vector<4x1x3xi1>, vector<3x8xi1> into vector<4x1x8xi1> + +/// w == 1, kw == 0 +// CHECK: vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: kind = #vector.kind +// CHECK-SAME: : vector<4x1x3xi1>, vector<3x8xi1> into vector<4x1x8xi1> + +// ----- + func.func @conv1d_ncw_4x8x2_memref(%input: memref<4x3x6xf32>, %filter: memref<8x3x2xf32>, %output: memref<4x8x2xf32>) { linalg.conv_1d_ncw_fcw {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} From 26029d77a57cb4aaa1479064109e985a90d0edd8 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 24 Sep 2024 10:42:26 -0700 Subject: [PATCH 028/212] [DirectX] Add atan2 intrinsic and expand for DXIL backend (p1) (#108865) This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This preliminary work adds the intrinsic to llvm and expands using atan intrinsic for DXIL backend, since DXIL has no atan2 op. Part 1 for Implement the atan2 HLSL Function #70096. --- llvm/docs/LangRef.rst | 37 ++++++++ llvm/include/llvm/IR/Intrinsics.td | 1 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 52 +++++++++++ llvm/test/CodeGen/DirectX/atan2.ll | 87 +++++++++++++++++++ llvm/test/CodeGen/DirectX/atan2_error.ll | 11 +++ 5 files changed, 188 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/atan2.ll create mode 100644 llvm/test/CodeGen/DirectX/atan2_error.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 91c3e60bb0acb..41d1efab752fd 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15583,6 +15583,43 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +'``llvm.atan2.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.atan2`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.atan2.f32(float %X, float %Y) + declare double @llvm.atan2.f64(double %X, double %Y) + declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y) + declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y) + declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y) + +Overview: +""""""""" + +The '``llvm.atan2.*``' intrinsics return the arctangent of the operand. + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same type. + +Semantics: +"""""""""" + +Return the same value as a corresponding libm '``atan2``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.sinh.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 0a74a217a5f01..48d57907e6d0b 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1016,6 +1016,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_asin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_acos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_atan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_atan2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index dd73b895b14d3..926cbe97f24fd 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -36,6 +36,7 @@ using namespace llvm; static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::abs: + case Intrinsic::atan2: case Intrinsic::exp: case Intrinsic::log: case Intrinsic::log10: @@ -307,6 +308,54 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, MultiplicandVec); } +static Value *expandAtan2Intrinsic(CallInst *Orig) { + Value *Y = Orig->getOperand(0); + Value *X = Orig->getOperand(1); + Type *Ty = X->getType(); + IRBuilder<> Builder(Orig); + Builder.setFastMathFlags(Orig->getFastMathFlags()); + + Value *Tan = Builder.CreateFDiv(Y, X); + + CallInst *Atan = + Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan"); + Atan->setTailCall(Orig->isTailCall()); + Atan->setAttributes(Orig->getAttributes()); + + // Modify atan result based on https://en.wikipedia.org/wiki/Atan2. + Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi); + Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2); + Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2); + Constant *Zero = ConstantFP::get(Ty, 0); + Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi); + Value *AtanSubPi = Builder.CreateFSub(Atan, Pi); + + // x > 0 -> atan. + Value *Result = Atan; + Value *XLt0 = Builder.CreateFCmpOLT(X, Zero); + Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero); + Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero); + Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero); + + // x < 0, y >= 0 -> atan + pi. + Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0); + Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result); + + // x < 0, y < 0 -> atan - pi. + Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0); + Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result); + + // x == 0, y < 0 -> -pi/2 + Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0); + Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result); + + // x == 0, y > 0 -> pi/2 + Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0); + Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result); + + return Result; +} + static Value *expandPowIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); @@ -418,6 +467,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; + case Intrinsic::atan2: + Result = expandAtan2Intrinsic(Orig); + break; case Intrinsic::exp: Result = expandExpIntrinsic(Orig); break; diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll new file mode 100644 index 0000000000000..9d86f87f3ed50 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK + +; Make sure correct dxil expansions for atan2 are generated for float and half. + +define noundef float @atan2_float(float noundef %y, float noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv float %y, %x +; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]] +; CHECK: ret float [[SELECT_HPI]] + %elt.atan2 = call float @llvm.atan2.f32(float %y, float %x) + ret float %elt.atan2 +} + +define noundef half @atan2_half(half noundef %y, half noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv half %y, %x +; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]] +; CHECK: ret half [[SELECT_HPI]] + %elt.atan2 = call half @llvm.atan2.f16(half %y, half %x) + ret half %elt.atan2 +} + +define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) { +entry: +; Just Expansion, no scalarization or lowering: +; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x +; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]]) +; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer +; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer +; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]] +; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]] +; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> , <4 x float> [[SELECT_SUB_PI]] +; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> , <4 x float> [[SELECT_NEGHPI]] +; EXPCHECK: ret <4 x float> [[SELECT_HPI]] + +; Scalarization occurs after expansion, so atan scalarization is tested separately. +; Expansion, scalarization and lowering: +; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls. +; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}}) +; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17, + + %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x) + ret <4 x float> %elt.atan2 +} + +declare half @llvm.atan2.f16(half, half) +declare float @llvm.atan2.f32(float, float) +declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/DirectX/atan2_error.ll b/llvm/test/CodeGen/DirectX/atan2_error.ll new file mode 100644 index 0000000000000..5b3077f85f5d4 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2_error.ll @@ -0,0 +1,11 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation atan does not support double overload type +; CHECK: in function atan2_double +; CHECK-SAME: Cannot create ATan operation: Invalid overload type + +define noundef double @atan2_double(double noundef %a, double noundef %b) #0 { +entry: + %1 = call double @llvm.atan2.f64(double %a, double %b) + ret double %1 +} From 12285cca5ed713dfd483bd96422a5607b8af0085 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez=20Troiti=C3=B1o?= Date: Tue, 24 Sep 2024 10:55:35 -0700 Subject: [PATCH 029/212] [DWARF] Use ULEB128 and not just one byte for directory indices (#109067) According to the standard `DW_LNCT_directory_index` can be `data1`, `data2`, or `udata` (see 6.2.4.1). The code was using `data1`, but this limits the number of directories to 256, even if the variable holding the directory index is a `uint64_t`. `dsymutil` was hitting an assertion when trying to write directory indices higher than 255. Modify the classic and the parallel DWARF linkers to use `udata` and encode the directory indices as ULEB128 and provide a test that has more than 256 directories to check the changes are working as expected. For people that were using `dsymutil` with CUs that had between 128-256 directories, this will mean that for those indices 2 bytes will be used now, instead of just one. --- .../lib/DWARFLinker/Classic/DWARFStreamer.cpp | 5 +- .../Parallel/DebugLineSectionEmitter.h | 4 +- .../X86/dwarf5-many-include-directories.test | 213 ++++++++++++++++++ 3 files changed, 217 insertions(+), 5 deletions(-) create mode 100644 llvm/test/tools/dsymutil/X86/dwarf5-many-include-directories.test diff --git a/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp index bca3125368353..947db9cbcd92d 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp @@ -933,7 +933,7 @@ void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable( LineSectionSize += MS->emitULEB128IntValue(StrForm); LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_directory_index); - LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_data1); + LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_udata); if (HasChecksums) { LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_MD5); @@ -952,8 +952,7 @@ void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable( // file_names (sequence of file name entries). for (auto File : P.FileNames) { emitLineTableString(P, File.Name, DebugStrPool, DebugLineStrPool); - MS->emitInt8(File.DirIdx); - LineSectionSize += 1; + LineSectionSize += MS->emitULEB128IntValue(File.DirIdx); if (HasChecksums) { MS->emitBinaryData( StringRef(reinterpret_cast(File.Checksum.data()), diff --git a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h index 38357c7f97314..b035c4b1d6c30 100644 --- a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h +++ b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h @@ -215,7 +215,7 @@ class DebugLineSectionEmitter { encodeULEB128(FileNameForm, Section.OS); encodeULEB128(dwarf::DW_LNCT_directory_index, Section.OS); - encodeULEB128(dwarf::DW_FORM_data1, Section.OS); + encodeULEB128(dwarf::DW_FORM_udata, Section.OS); if (HasChecksums) { encodeULEB128(dwarf::DW_LNCT_MD5, Section.OS); @@ -242,7 +242,7 @@ class DebugLineSectionEmitter { // A null-terminated string containing the full or relative path name of a // source file. Section.emitString(FileNameForm, *FileNameStr); - Section.emitIntVal(File.DirIdx, 1); + encodeULEB128(File.DirIdx, Section.OS); if (HasChecksums) { assert((File.Checksum.size() == 16) && diff --git a/llvm/test/tools/dsymutil/X86/dwarf5-many-include-directories.test b/llvm/test/tools/dsymutil/X86/dwarf5-many-include-directories.test new file mode 100644 index 0000000000000..644eecd26d8af --- /dev/null +++ b/llvm/test/tools/dsymutil/X86/dwarf5-many-include-directories.test @@ -0,0 +1,213 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: split-file %s %t +# RUN: %python %t/all.py > %t/all.ll +# RUN: sed 's@---TEMPORARY_DIR---@%{/t:regex_replacement}@' %t/debug.map.template > %t/debug.map +# RUN: %llc_dwarf -mtriple x86_64-apple-macosx10.4.0 -o %t/all.o -filetype=obj %t/all.ll +# RUN: dsymutil -f -y %t/debug.map -o - | llvm-dwarfdump -debug-line - | FileCheck %s +# RUN: dsymutil --linker parallel -f -y %t/debug.map -o - | llvm-dwarfdump -debug-line - | tee %t/output.txt | FileCheck %s + +# CHECK: include_directories[255] = "/tmp/tmp.0HPkdttdoU/d254" +# CHECK-NEXT: include_directories[256] = "/tmp/tmp.0HPkdttdoU/d255" +# CHECK-NEXT: include_directories[257] = "/tmp/tmp.0HPkdttdoU/d256" + +# CHECK: dir_index: 255 +# CHECK: dir_index: 256 +# CHECK: dir_index: 257 + +# Original file generated doing the following (fish shell): +# - for cnt in (seq 0 256); mkdir -p d$cnt ; printf "void func$cnd() {}\n#define FUNC$cnt func$cnt()\n" >> d$cnt/f$cnt.c ; end +# - for cnt in (seq 0 256); printf "#include \"f$cnt.c\"" >> all.c ; end +# - printf "void all() {\n" >> all.c +# - for cnt in (seq 0 256); printf "FUNC$cnt;\n" >> all.c ; end +# - printf "}\n" >> all.c +# - clang -target x86_64-apple-macos -S -emit-llvm -gdwarf-5 -o all.ll all.c (for cnt in (seq 0 256); echo "-Id$cnt"; end) +# - Edit all.ll manually and change all DIFile so the directory in filename is +# moved into the directory field. +# - Transformed into Python manually. + +#--- all.py +import math +import string + +PROLOGUE = string.Template("""\ +; ModuleID = 'all.c' +source_filename = "all.c" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.4.0" +""") + +FUNCTION = string.Template("""\ +; Function Attrs: noinline nounwind optnone uwtable +define void @func$idx() #0 !dbg !$dbg_reference_subprogram { + ret void, !dbg !$dbg_reference_location_ret +} +""") + +ALL_FUNCTION_PROLOGUE = string.Template("""\ +; Function Attrs: noinline nounwind optnone uwtable +define void @all() #0 !dbg !$dbg_reference_subprogram { +""") + +ALL_FUNCTION_CALL = string.Template("""\ + call void @func$idx(), !dbg !$dbg_reference_location_call +""") + +ALL_FUNCTION_EPILOGUE = string.Template("""\ + ret void, !dbg !$dbg_reference_location_ret +} +""") + +DWARF_PROLOGUE = string.Template("""\ +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 18.1.6 (CentOS 18.1.6-3.el9)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/") +!1 = !DIFile(filename: "all.c", directory: "/tmp/tmp.0HPkdttdoU", checksumkind: CSK_MD5, checksum: "8b5068f097f0c272ddc808ed2d82cb12") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"uwtable", i32 2} +!7 = !{i32 7, !"frame-pointer", i32 2} +!8 = !{!"clang version 18.1.6 (CentOS 18.1.6-3.el9)"} +""") + +DWARF_FUNCTION_WITH_TYPE = string.Template("""\ +!$dbg_reference_subprogram = distinct !DISubprogram(name: "func$idx", scope: !$dbg_reference_file, file: !$dbg_reference_file, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0) +!$dbg_reference_file = !DIFile(filename: "f$idx.c", directory: "/tmp/tmp.0HPkdttdoU/d$idx", checksumkind: CSK_MD5, checksum: "01234567890123456789012345678901") +!11 = !DISubroutineType(types: !12) +!12 = !{null} +!$dbg_reference_location = !DILocation(line: 1, column: $column, scope: !$dbg_reference_subprogram) +""") + +DWARF_FUNCTION = string.Template("""\ +!$dbg_reference_subprogram = distinct !DISubprogram(name: "func$idx", scope: !$dbg_reference_file, file: !$dbg_reference_file, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0) +!$dbg_reference_file = !DIFile(filename: "f$idx.c", directory: "/tmp/tmp.0HPkdttdoU/d$idx", checksumkind: CSK_MD5, checksum: "01234567890123456789012345678901") +!$dbg_reference_location = !DILocation(line: 1, column: $column, scope: !$dbg_reference_subprogram) +""") + +DWARF_ALL_FUNCTION_PROLOGUE = string.Template("""\ +!$dbg_reference_subprogram = distinct !DISubprogram(name: "all", scope: !1, file: !1, line: $line_number, type: !11, scopeLine: $line_number, spFlags: DISPFlagDefinition, unit: !0) +""") + +DWARF_ALL_FUNCTION_LOCATION = string.Template("""\ +!$dbg_reference_location = !DILocation(line: $line_number, column: 1, scope: !$dbg_reference_subprogram) +""") + +NUM_FUNCS = 257 + +dbg_reference_subprogram = 9 +dbg_reference_file = 10 +dbg_reference_location = 13 +column_base = 15 +functions = [] +dwarf_subprograms = [] + +first = True +for idx in range(NUM_FUNCS): + functions.append( + FUNCTION.substitute( + idx=idx, + dbg_reference_subprogram=dbg_reference_subprogram, + dbg_reference_location_ret=dbg_reference_location, + ) + ) + if first: + dwarf_subprograms.append( + DWARF_FUNCTION_WITH_TYPE.substitute( + idx=idx, + dbg_reference_subprogram=dbg_reference_subprogram, + dbg_reference_file=dbg_reference_file, + dbg_reference_location=dbg_reference_location, + column=column_base, + ) + ) + else: + dwarf_subprograms.append( + DWARF_FUNCTION.substitute( + idx=idx, + dbg_reference_subprogram=dbg_reference_subprogram, + dbg_reference_file=dbg_reference_file, + dbg_reference_location=dbg_reference_location, + column=column_base + math.floor(math.log10(idx)), + ) + ) + + dbg_reference_subprogram += 5 if first else 3 + dbg_reference_file += 5 if first else 3 + dbg_reference_location += 3 + first = False + +dbg_reference_location = dbg_reference_subprogram + 1 +line_number = 258 +all_function = [] +dwarf_all_subprogram = [] + +all_function.append( + ALL_FUNCTION_PROLOGUE.substitute( + dbg_reference_subprogram=dbg_reference_subprogram + ) +) +dwarf_all_subprogram.append( + DWARF_ALL_FUNCTION_PROLOGUE.substitute( + dbg_reference_subprogram=dbg_reference_subprogram, + line_number=line_number + ) +) +line_number += 1 + +for idx in range(NUM_FUNCS): + all_function.append( + ALL_FUNCTION_CALL.substitute( + idx=idx, + dbg_reference_location_call=dbg_reference_location, + ) + ) + dwarf_all_subprogram.append( + DWARF_ALL_FUNCTION_LOCATION.substitute( + dbg_reference_location=dbg_reference_location, + line_number=line_number, + dbg_reference_subprogram=dbg_reference_subprogram, + ) + ) + + dbg_reference_location += 1 + line_number += 1 + +all_function.append( + ALL_FUNCTION_EPILOGUE.substitute( + dbg_reference_location_ret=dbg_reference_location + ) +) +dwarf_all_subprogram.append( + DWARF_ALL_FUNCTION_LOCATION.substitute( + dbg_reference_location=dbg_reference_location, + line_number=line_number, + dbg_reference_subprogram=dbg_reference_subprogram, + ) +) + +print(PROLOGUE.substitute()) +for function in functions: + print(function) +for all_function_piece in all_function: + print(all_function_piece, end='') +print() +print(DWARF_PROLOGUE.substitute(), end='') +for dwarf_subprogram in dwarf_subprograms: + print(dwarf_subprogram, end='') +for dwarf_all_subprogram_piece in dwarf_all_subprogram: + print(dwarf_all_subprogram_piece, end='') +print() + +#--- debug.map.template +--- +triple: 'x86_64-apple-darwin' +objects: + - filename: ---TEMPORARY_DIR---/all.o + symbols: + - { sym: _all, objAddr: 0x0, binAddr: 0x0, size: 0x0 } +... From 6dfeea3f865cff0c1e3ca19aa0e77f379809b2a9 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:11:14 -0400 Subject: [PATCH 030/212] Revert "[DirectX] Add atan2 intrinsic and expand for DXIL backend (p1)" (#109842) Reverts llvm/llvm-project#108865 Broke the Docs build --- llvm/docs/LangRef.rst | 37 -------- llvm/include/llvm/IR/Intrinsics.td | 1 - .../Target/DirectX/DXILIntrinsicExpansion.cpp | 52 ----------- llvm/test/CodeGen/DirectX/atan2.ll | 87 ------------------- llvm/test/CodeGen/DirectX/atan2_error.ll | 11 --- 5 files changed, 188 deletions(-) delete mode 100644 llvm/test/CodeGen/DirectX/atan2.ll delete mode 100644 llvm/test/CodeGen/DirectX/atan2_error.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 41d1efab752fd..91c3e60bb0acb 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15583,43 +15583,6 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. -'``llvm.atan2.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" - -This is an overloaded intrinsic. You can use ``llvm.atan2`` on any -floating-point or vector of floating-point type. Not all targets support -all types however. - -:: - - declare float @llvm.atan2.f32(float %X, float %Y) - declare double @llvm.atan2.f64(double %X, double %Y) - declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y) - declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y) - declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y) - -Overview: -""""""""" - -The '``llvm.atan2.*``' intrinsics return the arctangent of the operand. - -Arguments: -"""""""""" - -The arguments and return value are floating-point numbers of the same type. - -Semantics: -"""""""""" - -Return the same value as a corresponding libm '``atan2``' function but without -trapping or setting ``errno``. - -When specified with the fast-math-flag 'afn', the result may be approximated -using a less accurate calculation. - '``llvm.sinh.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 48d57907e6d0b..0a74a217a5f01 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1016,7 +1016,6 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_asin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_acos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_atan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_atan2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 926cbe97f24fd..dd73b895b14d3 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -36,7 +36,6 @@ using namespace llvm; static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::abs: - case Intrinsic::atan2: case Intrinsic::exp: case Intrinsic::log: case Intrinsic::log10: @@ -308,54 +307,6 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, MultiplicandVec); } -static Value *expandAtan2Intrinsic(CallInst *Orig) { - Value *Y = Orig->getOperand(0); - Value *X = Orig->getOperand(1); - Type *Ty = X->getType(); - IRBuilder<> Builder(Orig); - Builder.setFastMathFlags(Orig->getFastMathFlags()); - - Value *Tan = Builder.CreateFDiv(Y, X); - - CallInst *Atan = - Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan"); - Atan->setTailCall(Orig->isTailCall()); - Atan->setAttributes(Orig->getAttributes()); - - // Modify atan result based on https://en.wikipedia.org/wiki/Atan2. - Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi); - Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2); - Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2); - Constant *Zero = ConstantFP::get(Ty, 0); - Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi); - Value *AtanSubPi = Builder.CreateFSub(Atan, Pi); - - // x > 0 -> atan. - Value *Result = Atan; - Value *XLt0 = Builder.CreateFCmpOLT(X, Zero); - Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero); - Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero); - Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero); - - // x < 0, y >= 0 -> atan + pi. - Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0); - Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result); - - // x < 0, y < 0 -> atan - pi. - Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0); - Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result); - - // x == 0, y < 0 -> -pi/2 - Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0); - Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result); - - // x == 0, y > 0 -> pi/2 - Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0); - Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result); - - return Result; -} - static Value *expandPowIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); @@ -467,9 +418,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; - case Intrinsic::atan2: - Result = expandAtan2Intrinsic(Orig); - break; case Intrinsic::exp: Result = expandExpIntrinsic(Orig); break; diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll deleted file mode 100644 index 9d86f87f3ed50..0000000000000 --- a/llvm/test/CodeGen/DirectX/atan2.ll +++ /dev/null @@ -1,87 +0,0 @@ -; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK -; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK - -; Make sure correct dxil expansions for atan2 are generated for float and half. - -define noundef float @atan2_float(float noundef %y, float noundef %x) { -entry: -; CHECK: [[DIV:%.+]] = fdiv float %y, %x -; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]]) -; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]]) -; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000 -; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000 -; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00 -; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00 -; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00 -; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00 -; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] -; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]] -; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] -; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]] -; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] -; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]] -; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] -; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]] -; CHECK: ret float [[SELECT_HPI]] - %elt.atan2 = call float @llvm.atan2.f32(float %y, float %x) - ret float %elt.atan2 -} - -define noundef half @atan2_half(half noundef %y, half noundef %x) { -entry: -; CHECK: [[DIV:%.+]] = fdiv half %y, %x -; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]]) -; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]]) -; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248 -; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248 -; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000 -; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000 -; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000 -; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000 -; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] -; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]] -; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] -; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]] -; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] -; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]] -; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] -; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]] -; CHECK: ret half [[SELECT_HPI]] - %elt.atan2 = call half @llvm.atan2.f16(half %y, half %x) - ret half %elt.atan2 -} - -define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) { -entry: -; Just Expansion, no scalarization or lowering: -; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x -; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]]) -; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], -; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], -; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer -; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer -; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer -; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer -; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]] -; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]] -; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]] -; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]] -; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]] -; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> , <4 x float> [[SELECT_SUB_PI]] -; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]] -; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> , <4 x float> [[SELECT_NEGHPI]] -; EXPCHECK: ret <4 x float> [[SELECT_HPI]] - -; Scalarization occurs after expansion, so atan scalarization is tested separately. -; Expansion, scalarization and lowering: -; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls. -; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}}) -; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17, - - %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x) - ret <4 x float> %elt.atan2 -} - -declare half @llvm.atan2.f16(half, half) -declare float @llvm.atan2.f32(float, float) -declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/DirectX/atan2_error.ll b/llvm/test/CodeGen/DirectX/atan2_error.ll deleted file mode 100644 index 5b3077f85f5d4..0000000000000 --- a/llvm/test/CodeGen/DirectX/atan2_error.ll +++ /dev/null @@ -1,11 +0,0 @@ -; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s - -; DXIL operation atan does not support double overload type -; CHECK: in function atan2_double -; CHECK-SAME: Cannot create ATan operation: Invalid overload type - -define noundef double @atan2_double(double noundef %a, double noundef %b) #0 { -entry: - %1 = call double @llvm.atan2.f64(double %a, double %b) - ret double %1 -} From 312f73765b29db468cd282130d1b519ed3eeae96 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Tue, 24 Sep 2024 11:14:07 -0700 Subject: [PATCH 031/212] [lld][WebAssembly] Don't report relocation error when linking with -r/--relocatable (#109822) Followup to #104926. We ran into issues on the emscripten waterfall where relocation against `__dso_handle` were being reported as errors even though `-r/--relocatable` was being used to generate object file output rather than executable output. --- lld/test/wasm/unsupported-pic-relocations.s | 6 +++++- lld/test/wasm/unsupported-pic-relocations64.s | 6 +++++- lld/wasm/Relocations.cpp | 10 +++++----- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/lld/test/wasm/unsupported-pic-relocations.s b/lld/test/wasm/unsupported-pic-relocations.s index ea32e8468cdb4..2f85afa02c88b 100644 --- a/lld/test/wasm/unsupported-pic-relocations.s +++ b/lld/test/wasm/unsupported-pic-relocations.s @@ -15,6 +15,10 @@ # RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=import-dynamic 2>&1 | \ # RUN: FileCheck %s +## These errors should not be reported under -r/--relocation (i.e. when +## generating an object file) +# RUN: wasm-ld --experimental-pic -r %t.o -o /dev/null + .functype external_func () -> () use_undefined_function: @@ -23,7 +27,7 @@ use_undefined_function: # CHECK: error: {{.*}}.o: relocation R_WASM_TABLE_INDEX_REL_SLEB is not supported against an undefined symbol `external_func` drop end_function - + use_undefined_data: .functype use_undefined_data () -> () i32.const external_data@MBREL diff --git a/lld/test/wasm/unsupported-pic-relocations64.s b/lld/test/wasm/unsupported-pic-relocations64.s index db9707b7fbac5..df885b8d75fbe 100644 --- a/lld/test/wasm/unsupported-pic-relocations64.s +++ b/lld/test/wasm/unsupported-pic-relocations64.s @@ -15,6 +15,10 @@ # RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=import-dynamic 2>&1 | \ # RUN: FileCheck %s +## These errors should not be reported under -r/--relocation (i.e. when +## generating an object file) +# RUN: wasm-ld -mwasm64 --experimental-pic -r %t.o -o /dev/null + .functype external_func () -> () use_undefined_function: @@ -23,7 +27,7 @@ use_undefined_function: # CHECK: error: {{.*}}.o: relocation R_WASM_TABLE_INDEX_REL_SLEB64 is not supported against an undefined symbol `external_func` drop end_function - + use_undefined_data: .functype use_undefined_data () -> () i64.const external_data@MBREL diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 2dbfe33549471..45ad32701616a 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -173,7 +173,7 @@ void scanRelocations(InputChunk *chunk) { } } - if (sym->isUndefined()) { + if (!config->relocatable && sym->isUndefined()) { switch (reloc.Type) { case R_WASM_TABLE_INDEX_REL_SLEB: case R_WASM_TABLE_INDEX_REL_SLEB64: @@ -187,11 +187,11 @@ void scanRelocations(InputChunk *chunk) { toString(*sym) + "`"); break; } - } - if (sym->isUndefined() && !config->relocatable && !sym->isWeak()) { - // Report undefined symbols - reportUndefined(file, sym); + if (!sym->isWeak()) { + // Report undefined symbols + reportUndefined(file, sym); + } } } } From a977b9460f8f007a7dedd9197adc7d76bb95b0b6 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Tue, 24 Sep 2024 19:15:50 +0100 Subject: [PATCH 032/212] =?UTF-8?q?Reapply=20[compiler-rt]=20prctl=20inter?= =?UTF-8?q?ception=20update,=20SECCOMP=5FMODE=5FFILTER=20=E2=80=A6=20(#109?= =?UTF-8?q?834)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …support. #107722 --- .../sanitizer_common_interceptors.inc | 10 ++++++++ .../sanitizer_platform_limits_posix.cpp | 24 ++++++++++--------- .../sanitizer_platform_limits_posix.h | 3 ++- .../TestCases/Linux/prctl.cpp | 10 ++++++++ 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index bf5bb43018efc..e3a329712ac5a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -1289,6 +1289,11 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3, static const int PR_SCHED_CORE = 62; static const int PR_SCHED_CORE_GET = 0; static const int PR_GET_PDEATHSIG = 2; + static const int PR_SET_SECCOMP = 22; + +# if !SANITIZER_ANDROID + static const int SECCOMP_MODE_FILTER = 2; +# endif if (option == PR_SET_VMA && arg2 == 0UL) { char *name = (char *)arg5; COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1); @@ -1307,6 +1312,11 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3, COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg5), sizeof(u64)); } else if (res != -1 && option == PR_GET_PDEATHSIG) { COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg2), sizeof(int)); +# if !SANITIZER_ANDROID + } else if (res != -1 && option == PR_SET_SECCOMP && + arg2 == SECCOMP_MODE_FILTER) { + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg3), struct_sock_fprog_sz); +# endif } return res; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 6d61d276d77e3..5eeb2a89efa8c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -117,15 +117,16 @@ typedef struct user_fpregs elf_fpregset_t; #if SANITIZER_LINUX #if SANITIZER_GLIBC #include -#include -#include -#include -#include -#include -#if HAVE_RPC_XDR_H -# include -#endif -#include +# include +# include +# include +# include +# include +# include +# if HAVE_RPC_XDR_H +# include +# endif +# include #else #include #include @@ -531,9 +532,10 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); unsigned struct_audio_buf_info_sz = sizeof(struct audio_buf_info); unsigned struct_ppp_stats_sz = sizeof(struct ppp_stats); -#endif // SANITIZER_GLIBC + unsigned struct_sock_fprog_sz = sizeof(struct sock_fprog); +# endif // SANITIZER_GLIBC -#if !SANITIZER_ANDROID && !SANITIZER_APPLE +# if !SANITIZER_ANDROID && !SANITIZER_APPLE unsigned struct_sioc_sg_req_sz = sizeof(struct sioc_sg_req); unsigned struct_sioc_vif_req_sz = sizeof(struct sioc_vif_req); #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 34bfef1f7ef45..ca03841ccc198 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -1050,7 +1050,8 @@ extern unsigned struct_serial_struct_sz; extern unsigned struct_sockaddr_ax25_sz; extern unsigned struct_unimapdesc_sz; extern unsigned struct_unimapinit_sz; -#endif // SANITIZER_LINUX && !SANITIZER_ANDROID +extern unsigned struct_sock_fprog_sz; +# endif // SANITIZER_LINUX && !SANITIZER_ANDROID extern const unsigned long __sanitizer_bufsiz; diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp index cbff02d66efa7..dab1d1b48f868 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp @@ -4,6 +4,8 @@ #include #include +#include +#include #include #include #include @@ -78,5 +80,13 @@ int main() { } } + sock_filter f[] = {{.code = (BPF_LD | BPF_W | BPF_ABS), + .k = (uint32_t)(SKF_AD_OFF | SKF_AD_CPU)}, + {.code = (BPF_RET | BPF_A), .k = 0}}; + sock_fprog pr = {.len = 2, .filter = f}; + + res = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &pr); + assert(res == -1); + return 0; } From 6d3d5f30bd58af6d16d0b4b7d32dc3ead1e098ec Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 25 Sep 2024 02:23:14 +0800 Subject: [PATCH 033/212] [SLP][REVEC] getWidenedType should be used instead of FixedVectorType::get. (#109843) reference: https://github.com/llvm/llvm-project/issues/109835 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 4 +- .../SLPVectorizer/revec-fix-109835.ll | 70 +++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7e3dbe6260983..b79e964cdb1b6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9986,8 +9986,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } Cost += ::getShuffleCost( TTI, TTI::SK_InsertSubvector, - FixedVectorType::get(ScalarTy, CommonMask.size()), {}, CostKind, - Idx, FixedVectorType::get(ScalarTy, E->getVectorFactor())); + getWidenedType(ScalarTy, CommonMask.size()), {}, CostKind, Idx, + getWidenedType(ScalarTy, E->getVectorFactor())); if (!CommonMask.empty()) { std::iota(std::next(CommonMask.begin(), Idx), std::next(CommonMask.begin(), Idx + E->getVectorFactor()), diff --git a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll new file mode 100644 index 0000000000000..965bfc7074c63 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s + +@b = external dso_local local_unnamed_addr global i64, align 8 +@d = external dso_local local_unnamed_addr global i32, align 4 +@c = external dso_local local_unnamed_addr global i32, align 4 +@a = external dso_local local_unnamed_addr global i8, align 2 + +define void @e() { +; CHECK-LABEL: @e( +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: [[C_PROMOTED5:%.*]] = load i32, ptr @c, align 4 +; CHECK-NEXT: [[A_PROMOTED7:%.*]] = load i8, ptr @a, align 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[C_PROMOTED5]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i8> , i8 [[A_PROMOTED7]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <16 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i1> [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i1> [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i1> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = zext <16 x i1> [[TMP12]] to <16 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP0]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[C_PROMOTED5]], 81 +; CHECK-NEXT: store i64 -1, ptr @b, align 8 +; CHECK-NEXT: store i32 9, ptr @d, align 4 +; CHECK-NEXT: store i32 [[TMP17]], ptr @c, align 4 +; CHECK-NEXT: store i8 [[TMP16]], ptr @a, align 2 +; CHECK-NEXT: ret void +; +vector.ph: + %c.promoted5 = load i32, ptr @c, align 4 + %a.promoted7 = load i8, ptr @a, align 2 + %.splatinsert = insertelement <16 x i32> poison, i32 %c.promoted5, i64 0 + %.splat = shufflevector <16 x i32> %.splatinsert, <16 x i32> poison, <16 x i32> zeroinitializer + %0 = insertelement <16 x i8> , i8 %a.promoted7, i64 0 + %1 = add <16 x i32> %.splat, + %2 = add <16 x i32> %.splat, + %3 = add <16 x i32> %.splat, + %induction = add <16 x i32> %.splat, + %4 = icmp ult <16 x i32> %1, + %5 = icmp ult <16 x i32> %2, + %6 = icmp ult <16 x i32> %3, + %7 = icmp ult <16 x i32> %induction, + %8 = icmp eq <16 x i32> %.splat, + %9 = or <16 x i1> %4, %5 + %10 = or <16 x i1> %9, %6 + %11 = or <16 x i1> %10, %7 + %12 = or <16 x i1> %11, %8 + %13 = zext <16 x i1> %12 to <16 x i8> + %14 = or <16 x i8> %0, %13 + %15 = shufflevector <16 x i8> %14, <16 x i8> , <16 x i32> + %16 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %15) + %17 = add i32 %c.promoted5, 81 + store i64 -1, ptr @b, align 8 + store i32 9, ptr @d, align 4 + store i32 %17, ptr @c, align 4 + store i8 %16, ptr @a, align 2 + ret void +} From 71ca9fcb8dc9ea0e1e3a4a47820edc78c398a85e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 24 Sep 2024 22:32:53 +0400 Subject: [PATCH 034/212] llvm-reduce: Don't print verifier failed machine functions (#109673) This produces far too much terminal output, particularly for the instruction reduction. Since it doesn't consider the liveness of of the instructions it's deleting, it produces quite a lot of verifier errors. --- llvm/include/llvm/CodeGen/MachineFunction.h | 5 +- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 2 +- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 2 +- llvm/lib/CodeGen/MachineScheduler.cpp | 8 +- llvm/lib/CodeGen/MachineVerifier.cpp | 232 ++++++++++---------- llvm/lib/CodeGen/RegAllocGreedy.cpp | 10 +- llvm/lib/CodeGen/RegisterCoalescer.cpp | 4 +- llvm/tools/llvm-reduce/ReducerWorkItem.cpp | 22 +- llvm/unittests/MI/LiveIntervalTest.cpp | 4 +- 9 files changed, 159 insertions(+), 130 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index aeb72ca24d79b..5c1da4fa762e8 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -897,13 +897,14 @@ class LLVM_ABI MachineFunction { /// for debugger use. /// \returns true if no problems were found. bool verify(Pass *p = nullptr, const char *Banner = nullptr, - bool AbortOnError = true) const; + raw_ostream *OS = nullptr, bool AbortOnError = true) const; /// Run the current MachineFunction through the machine code verifier, useful /// for debugger use. /// \returns true if no problems were found. bool verify(LiveIntervals *LiveInts, SlotIndexes *Indexes, - const char *Banner = nullptr, bool AbortOnError = true) const; + const char *Banner = nullptr, raw_ostream *OS = nullptr, + bool AbortOnError = true) const; // Provide accessors for the MachineBasicBlock list... using iterator = BasicBlockListType::iterator; diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 8d6d800d76147..3a00b8ec4771d 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -634,7 +634,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MF.getSubtarget().mirFileLoaded(MF); - MF.verify(); + MF.verify(nullptr, nullptr, &errs()); return false; } diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 7dcb287b38734..a52c82d77ca64 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -3698,7 +3698,7 @@ void MachineBlockPlacement::assignBlockOrder( #ifndef NDEBUG // Make sure we correctly constructed all branches. - F->verify(this, "After optimized block reordering"); + F->verify(this, "After optimized block reordering", &errs()); #endif } diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 4e6d34346b1d8..9b2862de22b69 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -453,7 +453,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (VerifyScheduling) { LLVM_DEBUG(LIS->dump()); - MF->verify(this, "Before machine scheduling."); + MF->verify(this, "Before machine scheduling.", &errs()); } RegClassInfo->runOnMachineFunction(*MF); @@ -472,7 +472,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(LIS->dump()); if (VerifyScheduling) - MF->verify(this, "After machine scheduling."); + MF->verify(this, "After machine scheduling.", &errs()); return true; } @@ -496,7 +496,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { AA = &getAnalysis().getAAResults(); if (VerifyScheduling) - MF->verify(this, "Before post machine scheduling."); + MF->verify(this, "Before post machine scheduling.", &errs()); // Instantiate the selected scheduler for this target, function, and // optimization level. @@ -512,7 +512,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { scheduleRegions(*Scheduler, true); if (VerifyScheduling) - MF->verify(this, "After post machine scheduling."); + MF->verify(this, "After post machine scheduling.", &errs()); return true; } diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index e1295ec8ea6e9..24a0f41775cc1 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -94,21 +94,24 @@ using namespace llvm; namespace { struct MachineVerifier { - MachineVerifier(MachineFunctionAnalysisManager &MFAM, const char *b) - : MFAM(&MFAM), Banner(b) {} + MachineVerifier(MachineFunctionAnalysisManager &MFAM, const char *b, + raw_ostream *OS) + : MFAM(&MFAM), OS(OS ? *OS : nulls()), Banner(b) {} - MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {} + MachineVerifier(Pass *pass, const char *b, raw_ostream *OS) + : PASS(pass), OS(OS ? *OS : nulls()), Banner(b) {} MachineVerifier(const char *b, LiveVariables *LiveVars, LiveIntervals *LiveInts, LiveStacks *LiveStks, - SlotIndexes *Indexes) - : Banner(b), LiveVars(LiveVars), LiveInts(LiveInts), LiveStks(LiveStks), - Indexes(Indexes) {} + SlotIndexes *Indexes, raw_ostream *OS) + : OS(OS ? *OS : nulls()), Banner(b), LiveVars(LiveVars), + LiveInts(LiveInts), LiveStks(LiveStks), Indexes(Indexes) {} unsigned verify(const MachineFunction &MF); MachineFunctionAnalysisManager *MFAM = nullptr; Pass *const PASS = nullptr; + raw_ostream &OS; const char *Banner; const MachineFunction *MF = nullptr; const TargetMachine *TM = nullptr; @@ -334,7 +337,8 @@ namespace { MachineFunctionProperties::Property::FailsVerification)) return false; - unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF); + unsigned FoundErrors = + MachineVerifier(this, Banner.c_str(), &errs()).verify(MF); if (FoundErrors) report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors."); return false; @@ -352,7 +356,8 @@ MachineVerifierPass::run(MachineFunction &MF, if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailsVerification)) return PreservedAnalyses::all(); - unsigned FoundErrors = MachineVerifier(MFAM, Banner.c_str()).verify(MF); + unsigned FoundErrors = + MachineVerifier(MFAM, Banner.c_str(), &errs()).verify(MF); if (FoundErrors) report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors."); return PreservedAnalyses::all(); @@ -374,25 +379,28 @@ void llvm::verifyMachineFunction(const std::string &Banner, // LiveIntervals *LiveInts; // LiveStacks *LiveStks; // SlotIndexes *Indexes; - unsigned FoundErrors = MachineVerifier(nullptr, Banner.c_str()).verify(MF); + unsigned FoundErrors = + MachineVerifier(nullptr, Banner.c_str(), &errs()).verify(MF); if (FoundErrors) report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors."); } -bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors) - const { +bool MachineFunction::verify(Pass *p, const char *Banner, raw_ostream *OS, + bool AbortOnErrors) const { MachineFunction &MF = const_cast(*this); - unsigned FoundErrors = MachineVerifier(p, Banner).verify(MF); + unsigned FoundErrors = MachineVerifier(p, Banner, OS).verify(MF); if (AbortOnErrors && FoundErrors) report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors."); return FoundErrors == 0; } bool MachineFunction::verify(LiveIntervals *LiveInts, SlotIndexes *Indexes, - const char *Banner, bool AbortOnErrors) const { + const char *Banner, raw_ostream *OS, + bool AbortOnErrors) const { MachineFunction &MF = const_cast(*this); unsigned FoundErrors = - MachineVerifier(Banner, nullptr, LiveInts, nullptr, Indexes).verify(MF); + MachineVerifier(Banner, nullptr, LiveInts, nullptr, Indexes, OS) + .verify(MF); if (AbortOnErrors && FoundErrors) report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors."); return FoundErrors == 0; @@ -482,7 +490,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { for (const MachineInstr &MI : MBB.instrs()) { if (MI.getParent() != &MBB) { report("Bad instruction parent pointer", &MBB); - errs() << "Instruction: " << MI; + OS << "Instruction: " << MI; continue; } @@ -540,46 +548,48 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { void MachineVerifier::report(const char *msg, const MachineFunction *MF) { assert(MF); - errs() << '\n'; + OS << '\n'; if (!foundErrors++) { if (Banner) - errs() << "# " << Banner << '\n'; + OS << "# " << Banner << '\n'; + if (LiveInts != nullptr) - LiveInts->print(errs()); + LiveInts->print(OS); else - MF->print(errs(), Indexes); + MF->print(OS, Indexes); } - errs() << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getName() << "\n"; + + OS << "*** Bad machine code: " << msg << " ***\n" + << "- function: " << MF->getName() << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - errs() << "- basic block: " << printMBBReference(*MBB) << ' ' - << MBB->getName() << " (" << (const void *)MBB << ')'; + OS << "- basic block: " << printMBBReference(*MBB) << ' ' << MBB->getName() + << " (" << (const void *)MBB << ')'; if (Indexes) - errs() << " [" << Indexes->getMBBStartIdx(MBB) - << ';' << Indexes->getMBBEndIdx(MBB) << ')'; - errs() << '\n'; + OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' + << Indexes->getMBBEndIdx(MBB) << ')'; + OS << '\n'; } void MachineVerifier::report(const char *msg, const MachineInstr *MI) { assert(MI); report(msg, MI->getParent()); - errs() << "- instruction: "; + OS << "- instruction: "; if (Indexes && Indexes->hasIndex(*MI)) - errs() << Indexes->getInstructionIndex(*MI) << '\t'; - MI->print(errs(), /*IsStandalone=*/true); + OS << Indexes->getInstructionIndex(*MI) << '\t'; + MI->print(OS, /*IsStandalone=*/true); } void MachineVerifier::report(const char *msg, const MachineOperand *MO, unsigned MONum, LLT MOVRegType) { assert(MO); report(msg, MO->getParent()); - errs() << "- operand " << MONum << ": "; - MO->print(errs(), MOVRegType, TRI); - errs() << "\n"; + OS << "- operand " << MONum << ": "; + MO->print(OS, MOVRegType, TRI); + OS << '\n'; } void MachineVerifier::report(const Twine &Msg, const MachineInstr *MI) { @@ -587,11 +597,11 @@ void MachineVerifier::report(const Twine &Msg, const MachineInstr *MI) { } void MachineVerifier::report_context(SlotIndex Pos) const { - errs() << "- at: " << Pos << '\n'; + OS << "- at: " << Pos << '\n'; } void MachineVerifier::report_context(const LiveInterval &LI) const { - errs() << "- interval: " << LI << '\n'; + OS << "- interval: " << LI << '\n'; } void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit, @@ -603,35 +613,35 @@ void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit, } void MachineVerifier::report_context(const LiveRange::Segment &S) const { - errs() << "- segment: " << S << '\n'; + OS << "- segment: " << S << '\n'; } void MachineVerifier::report_context(const VNInfo &VNI) const { - errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n"; + OS << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n"; } void MachineVerifier::report_context_liverange(const LiveRange &LR) const { - errs() << "- liverange: " << LR << '\n'; + OS << "- liverange: " << LR << '\n'; } void MachineVerifier::report_context(MCPhysReg PReg) const { - errs() << "- p. register: " << printReg(PReg, TRI) << '\n'; + OS << "- p. register: " << printReg(PReg, TRI) << '\n'; } void MachineVerifier::report_context_vreg(Register VReg) const { - errs() << "- v. register: " << printReg(VReg, TRI) << '\n'; + OS << "- v. register: " << printReg(VReg, TRI) << '\n'; } void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const { if (VRegOrUnit.isVirtual()) { report_context_vreg(VRegOrUnit); } else { - errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; + OS << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n'; } } void MachineVerifier::report_context_lanemask(LaneBitmask LaneMask) const { - errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; + OS << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -710,8 +720,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has successor that isn't part of the function.", MBB); if (!MBBInfoMap[succ].Preds.count(MBB)) { report("Inconsistent CFG", MBB); - errs() << "MBB is not in the predecessor list of the successor " - << printMBBReference(*succ) << ".\n"; + OS << "MBB is not in the predecessor list of the successor " + << printMBBReference(*succ) << ".\n"; } } @@ -721,8 +731,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has predecessor that isn't part of the function.", MBB); if (!MBBInfoMap[Pred].Succs.count(MBB)) { report("Inconsistent CFG", MBB); - errs() << "MBB is not in the successor list of the predecessor " - << printMBBReference(*Pred) << ".\n"; + OS << "MBB is not in the successor list of the predecessor " + << printMBBReference(*Pred) << ".\n"; } } @@ -880,7 +890,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { SlotIndex idx = Indexes->getInstructionIndex(*MI); if (!(idx > lastIndex)) { report("Instruction index out of order", MI); - errs() << "Last instruction was at " << lastIndex << '\n'; + OS << "Last instruction was at " << lastIndex << '\n'; } lastIndex = idx; } @@ -894,7 +904,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { // precede non-terminators. if (FirstTerminator->getOpcode() != TargetOpcode::G_INVOKE_REGION_START) { report("Non-terminator instruction after the first terminator", MI); - errs() << "First terminator was:\t" << *FirstTerminator; + OS << "First terminator was:\t" << *FirstTerminator; } } } @@ -2185,8 +2195,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); - errs() << MCID.getNumOperands() << " operands expected, but " - << MI->getNumOperands() << " given.\n"; + OS << MCID.getNumOperands() << " operands expected, but " + << MI->getNumOperands() << " given.\n"; } if (MI->getFlag(MachineInstr::NoConvergent) && !MCID.isConvergent()) @@ -2278,7 +2288,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // If both types are valid, check that the types are the same. if (SrcTy != DstTy) { report("Copy Instruction is illegal with mismatching types", MI); - errs() << "Def = " << DstTy << ", Src = " << SrcTy << "\n"; + OS << "Def = " << DstTy << ", Src = " << SrcTy << '\n'; } break; @@ -2322,8 +2332,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (SrcSize.isNonZero() && DstSize.isNonZero() && SrcSize != DstSize) { if (!DstOp.getSubReg() && !SrcOp.getSubReg()) { report("Copy Instruction is illegal with mismatching sizes", MI); - errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize - << "\n"; + OS << "Def Size = " << DstSize << ", Src Size = " << SrcSize << '\n'; } } break; @@ -2554,8 +2563,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); - errs() << printReg(Reg, TRI) << " is not a " - << TRI->getRegClassName(DRC) << " register.\n"; + OS << printReg(Reg, TRI) << " is not a " + << TRI->getRegClassName(DRC) << " register.\n"; } } } @@ -2618,9 +2627,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { RBI->getMaximumSize(RegBank->getID()) < Ty.getSizeInBits()) { report("Register bank is too small for virtual register", MO, MONum); - errs() << "Register bank " << RegBank->getName() << " too small(" - << RBI->getMaximumSize(RegBank->getID()) << ") to fit " - << Ty.getSizeInBits() << "-bits\n"; + OS << "Register bank " << RegBank->getName() << " too small(" + << RBI->getMaximumSize(RegBank->getID()) << ") to fit " + << Ty.getSizeInBits() << "-bits\n"; return; } } @@ -2639,10 +2648,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { report("Virtual register does not match instruction constraint", MO, MONum); - errs() << "Expect register class " - << TRI->getRegClassName( - TII->getRegClass(MCID, MONum, TRI, *MF)) - << " but got nothing\n"; + OS << "Expect register class " + << TRI->getRegClassName(TII->getRegClass(MCID, MONum, TRI, *MF)) + << " but got nothing\n"; return; } @@ -2653,14 +2661,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TRI->getSubClassWithSubReg(RC, SubIdx); if (!SRC) { report("Invalid subregister index for virtual register", MO, MONum); - errs() << "Register class " << TRI->getRegClassName(RC) - << " does not support subreg index " << SubIdx << "\n"; + OS << "Register class " << TRI->getRegClassName(RC) + << " does not support subreg index " << SubIdx << '\n'; return; } if (RC != SRC) { report("Invalid register class for subregister index", MO, MONum); - errs() << "Register class " << TRI->getRegClassName(RC) - << " does not fully support subreg index " << SubIdx << "\n"; + OS << "Register class " << TRI->getRegClassName(RC) + << " does not fully support subreg index " << SubIdx << '\n'; return; } } @@ -2682,9 +2690,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); - errs() << "Expected a " << TRI->getRegClassName(DRC) - << " register, but got a " << TRI->getRegClassName(RC) - << " register\n"; + OS << "Expected a " << TRI->getRegClassName(DRC) + << " register, but got a " << TRI->getRegClassName(RC) + << " register\n"; } } } @@ -2733,11 +2741,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (loads && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); - errs() << "Live stack: " << LI << '\n'; + OS << "Live stack: " << LI << '\n'; } if (stores && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); - errs() << "Live stack: " << LI << '\n'; + OS << "Live stack: " << LI << '\n'; } } break; @@ -3050,8 +3058,8 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { SlotIndex stop = Indexes->getMBBEndIdx(MBB); if (!(stop > lastIndex)) { report("Block ends before last instruction index", MBB); - errs() << "Block ends at " << stop - << " last instruction was at " << lastIndex << '\n'; + OS << "Block ends at " << stop << " last instruction was at " << lastIndex + << '\n'; } lastIndex = stop; } @@ -3296,8 +3304,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { for (MachineBasicBlock *Pred : MBB.predecessors()) { if (!seen.count(Pred)) { report("Missing PHI operand", &Phi); - errs() << printMBBReference(*Pred) - << " is a predecessor according to the CFG.\n"; + OS << printMBBReference(*Pred) + << " is a predecessor according to the CFG.\n"; } } } @@ -3306,9 +3314,10 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { static void verifyConvergenceControl(const MachineFunction &MF, MachineDominatorTree &DT, - std::function FailureCB) { + std::function FailureCB, + raw_ostream &OS) { MachineConvergenceVerifier CV; - CV.initialize(&errs(), FailureCB, MF); + CV.initialize(&OS, FailureCB, MF); for (const auto &MBB : MF) { CV.visit(MBB); @@ -3326,7 +3335,7 @@ void MachineVerifier::visitMachineFunctionAfter() { auto FailureCB = [this](const Twine &Message) { report(Message.str().c_str(), MF); }; - verifyConvergenceControl(*MF, DT, FailureCB); + verifyConvergenceControl(*MF, DT, FailureCB, OS); calcRegsPassed(); @@ -3342,8 +3351,8 @@ void MachineVerifier::visitMachineFunctionAfter() { for (Register VReg : MInfo.vregsRequired) if (MInfo.regsKilled.count(VReg)) { report("Virtual register killed in block, but needed live out.", &MBB); - errs() << "Virtual register " << printReg(VReg) - << " is used after the block.\n"; + OS << "Virtual register " << printReg(VReg) + << " is used after the block.\n"; } } @@ -3379,9 +3388,8 @@ void MachineVerifier::visitMachineFunctionAfter() { if (!PInfo.regsLiveOut.count(LiveInReg)) { report("Live in register not found to be live out from predecessor.", &MBB); - errs() << TRI->getName(LiveInReg) - << " not found to be live out from " - << printMBBReference(*Pred) << "\n"; + OS << TRI->getName(LiveInReg) << " not found to be live out from " + << printMBBReference(*Pred) << '\n'; } } } @@ -3418,14 +3426,14 @@ void MachineVerifier::verifyLiveVariables() { if (MInfo.vregsRequired.count(Reg)) { if (!VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block missing from AliveBlocks", &MBB); - errs() << "Virtual register " << printReg(Reg) - << " must be live through the block.\n"; + OS << "Virtual register " << printReg(Reg) + << " must be live through the block.\n"; } } else { if (VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block should not be in AliveBlocks", &MBB); - errs() << "Virtual register " << printReg(Reg) - << " is not needed live through the block.\n"; + OS << "Virtual register " << printReg(Reg) + << " is not needed live through the block.\n"; } } } @@ -3443,7 +3451,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!LiveInts->hasInterval(Reg)) { report("Missing live interval for virtual register", MF); - errs() << printReg(Reg, TRI) << " still has defs or uses\n"; + OS << printReg(Reg, TRI) << " still has defs or uses\n"; continue; } @@ -3755,9 +3763,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, report("Register not marked live out of predecessor", Pred); report_context(LR, Reg, LaneMask); report_context(*VNI); - errs() << " live into " << printMBBReference(*MFI) << '@' - << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " - << PEnd << '\n'; + OS << " live into " << printMBBReference(*MFI) << '@' + << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " << PEnd + << '\n'; continue; } @@ -3765,10 +3773,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!IsPHI && PVNI != VNI) { report("Different value live out of predecessor", Pred); report_context(LR, Reg, LaneMask); - errs() << "Valno #" << PVNI->id << " live out of " - << printMBBReference(*Pred) << '@' << PEnd << "\nValno #" - << VNI->id << " live into " << printMBBReference(*MFI) << '@' - << LiveInts->getMBBStartIdx(&*MFI) << '\n'; + OS << "Valno #" << PVNI->id << " live out of " + << printMBBReference(*Pred) << '@' << PEnd << "\nValno #" << VNI->id + << " live into " << printMBBReference(*MFI) << '@' + << LiveInts->getMBBStartIdx(&*MFI) << '\n'; } } if (&*MFI == EndMBB) @@ -3823,11 +3831,11 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { report("Multiple connected components in live interval", MF); report_context(LI); for (unsigned comp = 0; comp != NumComp; ++comp) { - errs() << comp << ": valnos"; + OS << comp << ": valnos"; for (const VNInfo *I : LI.valnos) if (comp == ConEQ.getEqClass(I)) - errs() << ' ' << I->id; - errs() << '\n'; + OS << ' ' << I->id; + OS << '\n'; } } } @@ -3889,9 +3897,9 @@ void MachineVerifier::verifyStackFrame() { report("Call frame size on entry does not match value computed from " "predecessor", MBB); - errs() << "Call frame size on entry " << MBB->getCallFrameSize() - << " does not match value computed from predecessor " - << -BBState.EntryValue << '\n'; + OS << "Call frame size on entry " << MBB->getCallFrameSize() + << " does not match value computed from predecessor " + << -BBState.EntryValue << '\n'; } // Update stack state by checking contents of MBB. @@ -3914,8 +3922,8 @@ void MachineVerifier::verifyStackFrame() { BBState.ExitValue; if (BBState.ExitIsSetup && AbsSPAdj != Size) { report("FrameDestroy is after FrameSetup ", &I); - errs() << "FrameDestroy <" << Size << "> is after FrameSetup <" - << AbsSPAdj << ">.\n"; + OS << "FrameDestroy <" << Size << "> is after FrameSetup <" + << AbsSPAdj << ">.\n"; } if (!MRI->isSSA() && !MF->getFrameInfo().adjustsStack()) report("AdjustsStack not set in presence of a frame pseudo " @@ -3933,11 +3941,11 @@ void MachineVerifier::verifyStackFrame() { (SPState[Pred->getNumber()].ExitValue != BBState.EntryValue || SPState[Pred->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { report("The exit stack state of a predecessor is inconsistent.", MBB); - errs() << "Predecessor " << printMBBReference(*Pred) - << " has exit state (" << SPState[Pred->getNumber()].ExitValue - << ", " << SPState[Pred->getNumber()].ExitIsSetup << "), while " - << printMBBReference(*MBB) << " has entry state (" - << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + OS << "Predecessor " << printMBBReference(*Pred) << " has exit state (" + << SPState[Pred->getNumber()].ExitValue << ", " + << SPState[Pred->getNumber()].ExitIsSetup << "), while " + << printMBBReference(*MBB) << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; } } @@ -3948,11 +3956,11 @@ void MachineVerifier::verifyStackFrame() { (SPState[Succ->getNumber()].EntryValue != BBState.ExitValue || SPState[Succ->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { report("The entry stack state of a successor is inconsistent.", MBB); - errs() << "Successor " << printMBBReference(*Succ) - << " has entry state (" << SPState[Succ->getNumber()].EntryValue - << ", " << SPState[Succ->getNumber()].EntryIsSetup << "), while " - << printMBBReference(*MBB) << " has exit state (" - << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + OS << "Successor " << printMBBReference(*Succ) << " has entry state (" + << SPState[Succ->getNumber()].EntryValue << ", " + << SPState[Succ->getNumber()].EntryIsSetup << "), while " + << printMBBReference(*MBB) << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; } } diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 5001b4fec58f2..1ad70c86d68e3 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1054,7 +1054,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } if (VerifyEnabled) - MF->verify(this, "After splitting live range around region"); + MF->verify(this, "After splitting live range around region", &errs()); } MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg, @@ -1323,7 +1323,7 @@ unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg, } if (VerifyEnabled) - MF->verify(this, "After splitting live range around basic blocks"); + MF->verify(this, "After splitting live range around basic blocks", &errs()); return 0; } @@ -2507,7 +2507,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS); if (VerifyEnabled) - MF->verify(this, "After spilling"); + MF->verify(this, "After spilling", &errs()); } // The live virtual register requesting allocation was spilled, so tell @@ -2711,7 +2711,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); if (VerifyEnabled) - MF->verify(this, "Before greedy register allocator"); + MF->verify(this, "Before greedy register allocator", &errs()); RegAllocBase::init(getAnalysis(), getAnalysis().getLIS(), @@ -2770,7 +2770,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { tryHintsRecoloring(); if (VerifyEnabled) - MF->verify(this, "Before post optimization"); + MF->verify(this, "Before post optimization", &errs()); postOptimization(); reportStats(); diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 99125200c1a4f..2e1f498c090d1 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -4239,7 +4239,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { JoinSplitEdges = EnableJoinSplits; if (VerifyCoalescing) - MF->verify(this, "Before register coalescing"); + MF->verify(this, "Before register coalescing", &errs()); DbgVRegToValues.clear(); buildVRegToDbgValueMap(fn); @@ -4299,7 +4299,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { LLVM_DEBUG(dump()); if (VerifyCoalescing) - MF->verify(this, "After register coalescing"); + MF->verify(this, "After register coalescing", &errs()); return true; } diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp index 1510e9fb32007..5409b6dc7459d 100644 --- a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp +++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp @@ -52,6 +52,11 @@ extern cl::OptionCategory LLVMReduceOptions; static cl::opt TargetTriple("mtriple", cl::desc("Set the target triple"), cl::cat(LLVMReduceOptions)); +static cl::opt PrintInvalidMachineReductions( + "print-invalid-reduction-machine-verifier-errors", + cl::desc( + "Print machine verifier errors on invalid reduction attempts triple"), + cl::cat(LLVMReduceOptions)); static cl::opt TmpFilesAsBitcode( "write-tmp-files-as-bitcode", @@ -417,7 +422,7 @@ static std::unique_ptr cloneMF(MachineFunction *SrcMF, DstMRI->freezeReservedRegs(); - DstMF->verify(nullptr, "", /*AbortOnError=*/true); + DstMF->verify(nullptr, "", &errs(), /*AbortOnError=*/true); return DstMF; } @@ -450,8 +455,21 @@ bool ReducerWorkItem::verify(raw_fd_ostream *OS) const { for (const Function &F : getModule()) { if (const MachineFunction *MF = MMI->getMachineFunction(F)) { - if (!MF->verify(nullptr, "", /*AbortOnError=*/false)) + // With the current state of quality, most reduction attempts fail the + // machine verifier. Avoid spamming large function dumps on nearly every + // attempt until the situation is better. + if (!MF->verify(nullptr, "", + /*OS=*/PrintInvalidMachineReductions ? &errs() : nullptr, + /*AbortOnError=*/false)) { + + if (!PrintInvalidMachineReductions) { + WithColor::warning(errs()) + << "reduction attempt on function '" << MF->getName() + << "' failed machine verifier (debug with " + "-print-invalid-reduction-machine-verifier-errors)\n"; + } return true; + } } } diff --git a/llvm/unittests/MI/LiveIntervalTest.cpp b/llvm/unittests/MI/LiveIntervalTest.cpp index 7dcd82f3e7aa6..f910e8e1f2c8f 100644 --- a/llvm/unittests/MI/LiveIntervalTest.cpp +++ b/llvm/unittests/MI/LiveIntervalTest.cpp @@ -101,7 +101,9 @@ struct TestPassT : public TestPass { bool runOnMachineFunction(MachineFunction &MF) override { AnalysisType &A = getAnalysis(); T(MF, A); - EXPECT_EQ(MF.verify(this, /* Banner */ nullptr, /* AbortOnError */ false), + EXPECT_EQ(MF.verify(this, /* Banner=*/nullptr, + /*OS=*/nullptr, + /* AbortOnError=*/false), ShouldPass); return true; } From a6bdf3face377ee7ea84a02bada8a7e2ff380fe8 Mon Sep 17 00:00:00 2001 From: Charlie Barto Date: Tue, 24 Sep 2024 11:45:10 -0700 Subject: [PATCH 035/212] [asan][windows][tests] support MSVC compiler-id in asan tests (#109706) This follows up on https://github.com/llvm/llvm-project/pull/108255 to allow actually running the test suite with MSVC. Note, however, that MSVC can't yet build compiler-rt, most tests don't yet pass with msvc, and building and testing with different compilers is ill-supported. Follow ups will fix the first two issues, the third is future work. Note that `/Zi` is removed from the clang-cl command line, but lit as a whole adds `-gcodeview`, so there should still be debug info. --- compiler-rt/test/asan/lit.cfg.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py index 05ed7e8dd294e..dac3ef00a99af 100644 --- a/compiler-rt/test/asan/lit.cfg.py +++ b/compiler-rt/test/asan/lit.cfg.py @@ -153,12 +153,16 @@ def build_invocation(compile_flags, with_lto=False): if platform.system() == "Windows": # MSVC-specific tests might also use the clang-cl.exe driver. if target_is_msvc: - clang_cl_cxxflags = [ - "-Wno-deprecated-declarations", - "-WX", - "-D_HAS_EXCEPTIONS=0", - "-Zi", - ] + target_cflags + clang_cl_cxxflags = ( + [ + "-WX", + "-D_HAS_EXCEPTIONS=0", + ] + + config.debug_info_flags + + target_cflags + ) + if config.compiler_id != "MSVC": + clang_cl_cxxflags = ["-Wno-deprecated-declarations"] + clang_cl_cxxflags clang_cl_asan_cxxflags = ["-fsanitize=address"] + clang_cl_cxxflags if config.asan_dynamic: clang_cl_asan_cxxflags.append("-MD") @@ -286,6 +290,12 @@ def build_invocation(compile_flags, with_lto=False): [config.compiler_rt_libdir, os.environ.get("PATH", "")] ) +# msvc needs to be instructed where the compiler-rt libraries are +if config.compiler_id == "MSVC": + config.environment["LIB"] = os.path.pathsep.join( + [config.compiler_rt_libdir, config.environment.get("LIB", "")] + ) + # Default test suffixes. config.suffixes = [".c", ".cpp"] From c95583f15f77a2f4fed4b520b2bcf7b442cbc4b3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 24 Sep 2024 19:54:56 +0100 Subject: [PATCH 036/212] [VPlan] Add createPtrAdd helper (NFC). Preparation for https://github.com/llvm/llvm-project/pull/106431. --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h | 5 +++++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 8 +++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 034fdf4233de3..00eec0a6f7b14 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -220,6 +220,11 @@ class VPBuilder { new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name)); } + VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL, + const Twine &Name = "") { + return createInstruction(VPInstruction::PtrAdd, {Ptr, Offset}, DL, Name); + } + VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b2893e8328722..3b37a1ec9560e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -593,12 +593,10 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr, nullptr, StartV, StepV, Builder); - auto *Recipe = new VPInstruction(VPInstruction::PtrAdd, - {PtrIV->getStartValue(), Steps}, - PtrIV->getDebugLoc(), "next.gep"); + VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps, + PtrIV->getDebugLoc(), "next.gep"); - Recipe->insertAfter(Steps); - PtrIV->replaceAllUsesWith(Recipe); + PtrIV->replaceAllUsesWith(PtrAdd); continue; } From 9830156f623c56062bf6df1b4c4b4bd8ab5bd57c Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Tue, 24 Sep 2024 15:01:15 -0400 Subject: [PATCH 037/212] [AMDGPU][True16][MC] add true16 and fake16 test file for vop3 instructions (#109695) duplicating mc test, and updating proper flag for true16 and fake16 test file for vop3 instructions. This is preparing for the up-coming VOP3 true16 changes --- llvm/test/MC/AMDGPU/gfx11_asm_vop3-fake16.s | 6199 ++++++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 8 +- .../MC/AMDGPU/gfx11_asm_vop3_dpp16-fake16.s | 4695 +++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 8 +- .../MC/AMDGPU/gfx11_asm_vop3_dpp8-fake16.s | 2968 +++++++ llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 8 +- llvm/test/MC/AMDGPU/gfx12_asm_vop3-fake16.s | 7294 +++++++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 8 +- .../MC/AMDGPU/gfx12_asm_vop3_dpp16-fake16.s | 5764 +++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s | 8 +- .../MC/AMDGPU/gfx12_asm_vop3_dpp8-fake16.s | 3814 +++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 8 +- .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 232 +- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 217 +- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 37 +- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 258 +- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 221 +- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 37 +- 18 files changed, 31560 insertions(+), 224 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8-fake16.s diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3-fake16.s new file mode 100644 index 0000000000000..d78673d933b7a --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3-fake16.s @@ -0,0 +1,6199 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] + +v_add3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xff,0x05,0xa4,0x01] + +v_add3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x01,0xfe,0xff,0x01] + +v_add3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x69,0xd2,0xf8,0x01] + +v_add3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x6a,0xf6,0x0c,0x04] + +v_add3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_add3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7b,0xfa,0xed,0x01] + +v_add3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7d,0xe0,0xf5,0x01] + +v_add3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7e,0x82,0xad,0x01] + +v_add3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7f,0xf8,0xa8,0x01] + +v_add3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_add3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xc1,0xfe,0xf4,0x03] + +v_add3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xf0,0xfa,0xc0,0x03] + +v_add3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x55,0xd6,0xfd,0xd4,0x04,0x03] + +v_add3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x00,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x00,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x00,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x00,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x00,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x00,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x00,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x00,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_add_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] + +v_add_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0xfe,0xfd,0x03,0x00] + +v_add_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x02,0x08,0x00,0x00] + +v_add_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x68,0xd0,0x00,0x00] + +v_add_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x6a,0xf4,0x00,0x00] + +v_add_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x27,0xd7,0x7e,0xfa,0x01,0x20] + +v_add_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0x7c,0xe0,0x01,0x00] + +v_add_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0xc1,0x82,0x01,0x00] + +v_add_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x27,0xd7,0xf0,0xf8,0x00,0x08] + +v_add_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x27,0xd7,0xfd,0xfc,0x00,0x70] + +v_add_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x27,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x0e,0x00] + +v_add_lshl_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0xff,0x05,0xa4,0x01] + +v_add_lshl_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x01,0xfe,0xff,0x01] + +v_add_lshl_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x69,0xd2,0xf8,0x01] + +v_add_lshl_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x6a,0xf6,0x0c,0x04] + +v_add_lshl_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7b,0xfa,0xed,0x01] + +v_add_lshl_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7d,0xe0,0xf5,0x01] + +v_add_lshl_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7e,0x82,0xad,0x01] + +v_add_lshl_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7f,0xf8,0xa8,0x01] + +v_add_lshl_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0xc1,0xfe,0xf4,0x03] + +v_add_lshl_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0xf0,0xfa,0xc0,0x03] + +v_add_lshl_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x47,0xd6,0xfd,0xd4,0x04,0x03] + +v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_nc_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +// GFX11: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +// GFX11: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX11: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_alignbit_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] + +v_alignbit_b32 v5, v255, s2, s3 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0xff,0x05,0x0c,0x00] + +v_alignbit_b32 v5, s1, v255, s3 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x01,0xfe,0x0f,0x00] + +v_alignbit_b32 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x69,0xd2,0xa4,0x01] + +v_alignbit_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x6a,0xf6,0x0c,0x04] + +v_alignbit_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_alignbit_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7b,0xfa,0xed,0x01] + +v_alignbit_b32 v5, m0, 0.5, exec_lo +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7d,0xe0,0xf9,0x01] + +v_alignbit_b32 v5, exec_lo, -1, m0 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7e,0x82,0xf5,0x01] + +v_alignbit_b32 v5, exec_hi, null, vcc_hi +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7f,0xf8,0xac,0x01] + +v_alignbit_b32 v5, null, exec_lo, vcc_lo +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0x7c,0xfc,0xa8,0x01] + +v_alignbit_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0xc1,0xfe,0xf4,0x03] + +v_alignbit_b32 v5, 0.5, m0, exec_hi +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0xf0,0xfa,0xfc,0x01] + +v_alignbit_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x16,0xd6,0xfd,0xd4,0x04,0x03] + +v_alignbit_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x16,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_alignbyte_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0x0e,0x00] + +v_alignbyte_b32 v5, v255, s2, s3 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0xff,0x05,0x0c,0x00] + +v_alignbyte_b32 v5, s1, v255, s3 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x01,0xfe,0x0f,0x00] + +v_alignbyte_b32 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] + +v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] + +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] + +v_alignbyte_b32 v5, m0, 0.5, exec_lo +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7d,0xe0,0xf9,0x01] + +v_alignbyte_b32 v5, exec_lo, -1, m0 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7e,0x82,0xf5,0x01] + +v_alignbyte_b32 v5, exec_hi, null, vcc_hi +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7f,0xf8,0xac,0x01] + +v_alignbyte_b32 v5, null, exec_lo, vcc_lo +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0x7c,0xfc,0xa8,0x01] + +v_alignbyte_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0xc1,0xfe,0xf4,0x03] + +v_alignbyte_b32 v5, 0.5, m0, exec_hi +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0xf0,0xfa,0xfc,0x01] + +v_alignbyte_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x17,0xd6,0xfd,0xd4,0x04,0x03] + +v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_and_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] + +v_and_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] + +v_and_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] + +v_and_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] + +v_and_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] + +v_and_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_and_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] + +v_and_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7d,0xe0,0x01,0x00] + +v_and_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] + +v_and_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] + +v_and_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] + +v_and_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] + +v_and_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0xf0,0xfa,0x00,0x00] + +v_and_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] + +v_and_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_and_or_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] + +v_and_or_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0xff,0x05,0xa4,0x01] + +v_and_or_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x01,0xfe,0xff,0x01] + +v_and_or_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x69,0xd2,0xf8,0x01] + +v_and_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x6a,0xf6,0x0c,0x04] + +v_and_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_and_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7b,0xfa,0xed,0x01] + +v_and_or_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7d,0xe0,0xf5,0x01] + +v_and_or_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7e,0x82,0xad,0x01] + +v_and_or_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7f,0xf8,0xa8,0x01] + +v_and_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_and_or_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0xc1,0xfe,0xf4,0x03] + +v_and_or_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0xf0,0xfa,0xc0,0x03] + +v_and_or_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x57,0xd6,0xfd,0xd4,0x04,0x03] + +v_and_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x57,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_ashrrev_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x01,0x05,0x02,0x00] + +v_ashrrev_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0xff,0xff,0x03,0x00] + +v_ashrrev_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x01,0x04,0x00,0x00] + +v_ashrrev_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x69,0xd2,0x00,0x00] + +v_ashrrev_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x6a,0xf6,0x00,0x00] + +v_ashrrev_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_ashrrev_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7b,0xfa,0x01,0x00] + +v_ashrrev_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7d,0xe0,0x01,0x00] + +v_ashrrev_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7e,0x82,0x01,0x00] + +v_ashrrev_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7f,0xf8,0x00,0x00] + +v_ashrrev_i16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0x7c,0xfc,0x00,0x00] + +v_ashrrev_i16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0xc1,0xfe,0x00,0x00] + +v_ashrrev_i16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0xf0,0xfa,0x00,0x00] + +v_ashrrev_i16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x3a,0xd7,0xfd,0xd4,0x00,0x00] + +v_ashrrev_i16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_ashrrev_i64 v[5:6], v1, vcc +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] + +v_ashrrev_i64 v[5:6], v255, exec +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0xff,0xfd,0x00,0x00] + +v_ashrrev_i64 v[5:6], exec_lo, v[2:3] +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x7e,0x04,0x02,0x00] + +v_ashrrev_i64 v[5:6], exec_hi, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x7f,0xfc,0x03,0x00] + +v_ashrrev_i64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x7c,0xf8,0x00,0x00] + +v_ashrrev_i64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0xc1,0x82,0x01,0x00] + +v_ashrrev_i64 v[5:6], 0.5, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ashrrev_i64 v[5:6], src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x3e,0xd7,0xfd,0xfa,0x01,0x00] + +v_ashrrev_i64 v[254:255], 0xaf123456, 0.5 +// GFX11: encoding: [0xfe,0x00,0x3e,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bcnt_u32_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x01,0x05,0x02,0x00] + +v_bcnt_u32_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0xff,0xff,0x03,0x00] + +v_bcnt_u32_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x01,0x04,0x00,0x00] + +v_bcnt_u32_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x69,0xd2,0x00,0x00] + +v_bcnt_u32_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x6a,0xf6,0x00,0x00] + +v_bcnt_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bcnt_u32_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7b,0xfa,0x01,0x00] + +v_bcnt_u32_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7d,0xe0,0x01,0x00] + +v_bcnt_u32_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7e,0x82,0x01,0x00] + +v_bcnt_u32_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7f,0xf8,0x00,0x00] + +v_bcnt_u32_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0x7c,0xfc,0x00,0x00] + +v_bcnt_u32_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0xc1,0xfe,0x00,0x00] + +v_bcnt_u32_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0xf0,0xfa,0x00,0x00] + +v_bcnt_u32_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd7,0xfd,0xd4,0x00,0x00] + +v_bcnt_u32_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x0e,0x00] + +v_bfe_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0xff,0x05,0xa4,0x01] + +v_bfe_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x01,0xfe,0xff,0x01] + +v_bfe_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfe_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfe_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfe_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfe_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7e,0x82,0xad,0x01] + +v_bfe_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfe_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfe_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfe_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x11,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfe_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x11,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x0e,0x00] + +v_bfe_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0xff,0x05,0xa4,0x01] + +v_bfe_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x01,0xfe,0xff,0x01] + +v_bfe_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfe_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfe_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfe_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfe_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7e,0x82,0xad,0x01] + +v_bfe_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfe_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfe_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfe_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x10,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfe_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x10,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x0e,0x00] + +v_bfi_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0xff,0x05,0xa4,0x01] + +v_bfi_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x01,0xfe,0xff,0x01] + +v_bfi_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfi_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfi_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfi_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfi_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7e,0x82,0xad,0x01] + +v_bfi_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfi_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfi_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfi_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfi_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x12,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfm_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x01,0x05,0x02,0x00] + +v_bfm_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0xff,0xff,0x03,0x00] + +v_bfm_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x01,0x04,0x00,0x00] + +v_bfm_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x69,0xd2,0x00,0x00] + +v_bfm_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x6a,0xf6,0x00,0x00] + +v_bfm_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bfm_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7b,0xfa,0x01,0x00] + +v_bfm_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7d,0xe0,0x01,0x00] + +v_bfm_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7e,0x82,0x01,0x00] + +v_bfm_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7f,0xf8,0x00,0x00] + +v_bfm_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0x7c,0xfc,0x00,0x00] + +v_bfm_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0xc1,0xfe,0x00,0x00] + +v_bfm_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0xf0,0xfa,0x00,0x00] + +v_bfm_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd7,0xfd,0xd4,0x00,0x00] + +v_bfm_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cndmask_b16 v5, v1, src_scc, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v255, 0.5, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, s105, s105, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, vcc_hi, v2, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, m0, v255, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_hi, exec_hi, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, null, m0, s105 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo +// W32: encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, 0.5, -1, vcc_hi +// W32: encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -|src_scc|, null, ttmp15 +// W32: encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v1, src_scc, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v255, 0.5, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, s105, s105, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, vcc_hi, v2, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, m0, v255, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, null, m0, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] +// W64: encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, 0.5, -1, vcc +// W64: encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] +// W64: encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null +// GFX11: encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_cubeid_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] + +v_cubeid_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0xff,0x05,0xa4,0x01] + +v_cubeid_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0xfe,0xff,0x01] + +v_cubeid_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubeid_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubeid_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubeid_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubeid_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubeid_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0c,0xd6,0x7e,0x82,0xad,0x01] + +v_cubeid_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubeid_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubeid_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubeid_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubeid_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x0c,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubeid_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x00] + +v_cubema_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0xff,0x05,0xa4,0x01] + +v_cubema_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x01,0xfe,0xff,0x01] + +v_cubema_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubema_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubema_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubema_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubema_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0f,0xd6,0x7e,0x82,0xad,0x01] + +v_cubema_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubema_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubema_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0f,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubema_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x0f,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubema_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x00] + +v_cubesc_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0xff,0x05,0xa4,0x01] + +v_cubesc_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x01,0xfe,0xff,0x01] + +v_cubesc_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubesc_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubesc_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubesc_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubesc_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0d,0xd6,0x7e,0x82,0xad,0x01] + +v_cubesc_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubesc_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubesc_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubesc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x0d,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubesc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x00] + +v_cubetc_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0xff,0x05,0xa4,0x01] + +v_cubetc_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x01,0xfe,0xff,0x01] + +v_cubetc_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubetc_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubetc_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubetc_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubetc_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0e,0xd6,0x7e,0x82,0xad,0x01] + +v_cubetc_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubetc_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubetc_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubetc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x0e,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_i16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_i16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_i16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_i16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_i16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_i16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_i16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_i16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_i16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_i16_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_i16_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_i16_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_i16_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_i16_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_i16_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_i16_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_i16_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_i16_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] + +v_cvt_pk_i16_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] + +v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_u16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_u16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_u16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_u16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_u16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_u16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_u16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_u16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_u16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_u16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_u16_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_u16_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_u16_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_u16_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_u16_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_u16_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_u16_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_u16_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_u16_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] + +v_cvt_pk_u16_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] + +v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] + +v_cvt_pk_u8_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] + +v_cvt_pk_u8_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] + +v_cvt_pk_u8_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] + +v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] + +v_cvt_pk_u8_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] + +v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] + +v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] + +v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] + +v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] + +v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null +// GFX11: encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_u16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_div_fixup_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] + +v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] + +v_div_fixup_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] + +v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] + +v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] + +v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] + +v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 +// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_div_fixup_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] + +v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] + +v_div_fixup_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] + +v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] + +v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] + +v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] + +v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] + +v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] + +v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX11: encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] + +v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] + +v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX11: encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX11: encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] + +v_div_fixup_f64 v[5:6], null, 0.5, vcc +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] + +v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX11: encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] + +v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX11: encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] + +v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] + +v_div_fmas_f32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0xaa,0x01] + +v_div_fmas_f32 v5, ttmp15, ttmp15, ttmp15 +// W32: encoding: [0x05,0x00,0x37,0xd6,0x7b,0xf6,0xec,0x01] + +v_div_fmas_f32 v5, -|m0|, -|v255|, v3 +// W32: encoding: [0x05,0x03,0x37,0xd6,0x7d,0xfe,0x0f,0x64] + +v_div_fmas_f32 v5, -|exec_lo|, -|exec_lo|, -|exec_lo| +// W32: encoding: [0x05,0x07,0x37,0xd6,0x7e,0xfc,0xf8,0xe1] + +v_div_fmas_f32 v5, -|exec_hi|, 0.5, -|v255| +// W32: encoding: [0x05,0x05,0x37,0xd6,0x7f,0xe0,0xfd,0xa7] + +v_div_fmas_f32 v5, null, exec_hi, -|exec_hi| +// W32: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfe,0xfc,0x81] + +v_div_fmas_f32 v5, -1, -|m0|, -|m0| +// W32: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xfa,0xf4,0xc1] + +v_div_fmas_f32 v5, 0.5, -|vcc_lo|, 0.5 mul:2 +// W32: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd4,0xc0,0x4b] + +v_div_fmas_f32 v5, vcc_lo, v2, v3 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] + +v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] + +v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 +// W64: encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] + +v_div_fmas_f32 v5, m0, 0.5, v255 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] + +v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| +// W64: encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] + +v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| +// W64: encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] + +v_div_fmas_f32 v5, null, m0, -|m0| +// W64: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] + +v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| +// W64: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] + +v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 +// W64: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] + +v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fmas_f32 v5, v255, src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] + +v_div_fmas_f32 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] + +v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] + +v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 +// GFX11: encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] + +v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] + +v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] +// GFX11: encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] + +v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| +// GFX11: encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] + +v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] + +v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null +// GFX11: encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] + +v_div_fmas_f64 v[5:6], null, 0.5, -src_scc +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] + +v_div_fmas_f64 v[5:6], -1, -exec, |exec| +// GFX11: encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] + +v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 +// GFX11: encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] + +v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 +// GFX11: encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] + +v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] + +v_div_scale_f32 v5, vcc_lo, v1, v2, s3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, v255, s2, s105 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W32: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, v1, v2, s3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, v255, s2, s105 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, s1, v255, exec_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, s105, s105, exec_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, m0, 0.5, m0 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W64: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 +// W32: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 +// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] + +v_dot2_bf16_bf16 v5, v255, v255, s105 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] + +v_dot2_bf16_bf16 v5, s1, s2, v3 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] + +v_dot2_bf16_bf16 v5, s105, s105, m0 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] + +v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] + +v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] + +v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo +// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] + +v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| +// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| +// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] + +v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| +// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] + +v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] + +v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 +// GFX11: v_dot2_bf16_bf16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] + +v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 +// GFX11: v_dot2_bf16_bf16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + +v_dot2_f16_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] + +v_dot2_f16_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] + +v_dot2_f16_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] + +v_dot2_f16_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] + +v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] + +v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] + +v_dot2_f16_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] + +v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] + +v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] + +v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v2, v0, 0x20004000, v2 +// GFX11: v_dot2_f16_f16 v2, v0, 0x20004000, v2 ; encoding: [0x02,0x00,0x66,0xd6,0x00,0xff,0x09,0x04,0x00,0x40,0x00,0x20] + +v_dot2_f16_f16 v2, 0x20004000, v0, v2 +// GFX11: v_dot2_f16_f16 v2, 0x20004000, v0, v2 ; encoding: [0x02,0x00,0x66,0xd6,0xff,0x00,0x0a,0x04,0x00,0x40,0x00,0x20] + +v_fma_dx9_zero_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_dx9_zero_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_dx9_zero_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_dx9_zero_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_dx9_zero_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_fma_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] + +v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] + +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 +// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_fma_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] + +v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] + +v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] + +v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX11: encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] + +v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] + +v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX11: encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX11: encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] + +v_fma_f64 v[5:6], null, 0.5, vcc +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] + +v_fma_f64 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX11: encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] + +v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX11: encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] + +v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] + +v_fma_legacy_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_legacy_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_legacy_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_legacy_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_legacy_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_legacy_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_legacy_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_legacy_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_legacy_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_legacy_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_legacy_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_legacy_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_legacy_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_legacy_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_legacy_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_ldexp_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] + +v_ldexp_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] + +v_ldexp_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] + +v_ldexp_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] + +v_ldexp_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] + +v_ldexp_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ldexp_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] + +v_ldexp_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] + +v_ldexp_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] + +v_ldexp_f32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] + +v_ldexp_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] + +v_ldexp_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] + +v_ldexp_f32 v5, 0.5, m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] + +v_ldexp_f32 v5, src_scc, vcc_lo mul:4 +// GFX11: encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] + +v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX11: encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ldexp_f64 v[5:6], v[1:2], v2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] + +v_ldexp_f64 v[5:6], v[1:2], v255 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] + +v_ldexp_f64 v[5:6], v[1:2], s2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] + +v_ldexp_f64 v[5:6], v[1:2], s105 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] + +v_ldexp_f64 v[5:6], v[254:255], ttmp15 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] + +v_ldexp_f64 v[5:6], s[2:3], vcc_hi +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] + +v_ldexp_f64 v[5:6], s[104:105], vcc_lo +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] + +v_ldexp_f64 v[5:6], vcc, m0 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] + +v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] + +v_ldexp_f64 v[5:6], exec, exec_lo +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] + +v_ldexp_f64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] + +v_ldexp_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] + +v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] + +v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX11: encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] + +v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] + +v_lerp_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] + +v_lerp_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] + +v_lerp_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] + +v_lerp_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] + +v_lerp_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lerp_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] + +v_lerp_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lerp_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lerp_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lerp_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] + +v_lerp_u8 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] + +v_lshl_add_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01] + +v_lshl_add_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01] + +v_lshl_add_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01] + +v_lshl_add_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lshl_add_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01] + +v_lshl_add_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lshl_add_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01] + +v_lshl_add_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lshl_add_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lshl_add_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lshl_add_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03] + +v_lshl_add_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] + +v_lshl_or_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01] + +v_lshl_or_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01] + +v_lshl_or_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01] + +v_lshl_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lshl_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01] + +v_lshl_or_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lshl_or_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01] + +v_lshl_or_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lshl_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lshl_or_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lshl_or_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03] + +v_lshl_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshlrev_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] + +v_lshlrev_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00] + +v_lshlrev_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00] + +v_lshlrev_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00] + +v_lshlrev_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00] + +v_lshlrev_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00] + +v_lshlrev_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7d,0xe0,0x01,0x00] + +v_lshlrev_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00] + +v_lshlrev_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00] + +v_lshlrev_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00] + +v_lshlrev_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00] + +v_lshlrev_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xf0,0xfa,0x00,0x00] + +v_lshlrev_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00] + +v_lshlrev_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b64 v[5:6], v1, vcc +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] + +v_lshlrev_b64 v[5:6], v255, exec +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xff,0xfd,0x00,0x00] + +v_lshlrev_b64 v[5:6], exec_lo, v[2:3] +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7e,0x04,0x02,0x00] + +v_lshlrev_b64 v[5:6], exec_hi, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7f,0xfc,0x03,0x00] + +v_lshlrev_b64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x7c,0xf8,0x00,0x00] + +v_lshlrev_b64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xc1,0x82,0x01,0x00] + +v_lshlrev_b64 v[5:6], 0.5, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshlrev_b64 v[5:6], src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x3c,0xd7,0xfd,0xfa,0x01,0x00] + +v_lshlrev_b64 v[254:255], 0xaf123456, 0.5 +// GFX11: encoding: [0xfe,0x00,0x3c,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] + +v_lshrrev_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00] + +v_lshrrev_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00] + +v_lshrrev_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00] + +v_lshrrev_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00] + +v_lshrrev_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00] + +v_lshrrev_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7d,0xe0,0x01,0x00] + +v_lshrrev_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00] + +v_lshrrev_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00] + +v_lshrrev_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00] + +v_lshrrev_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xf0,0xfa,0x00,0x00] + +v_lshrrev_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00] + +v_lshrrev_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b64 v[5:6], v1, vcc +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] + +v_lshrrev_b64 v[5:6], v255, exec +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00] + +v_lshrrev_b64 v[5:6], exec_lo, v[2:3] +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] + +v_lshrrev_b64 v[5:6], exec_hi, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00] + +v_lshrrev_b64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00] + +v_lshrrev_b64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00] + +v_lshrrev_b64 v[5:6], 0.5, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b64 v[5:6], src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00] + +v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 +// GFX11: encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mad_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i16 v5, v1, v2, v3 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] + +v_mad_i32_i16 v5, v255, v255, s3 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00] + +v_mad_i32_i16 v5, s1, s2, v255 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07] + +v_mad_i32_i16 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01] + +v_mad_i32_i16 v5, vcc_lo, ttmp15, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01] + +v_mad_i32_i16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i32_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i32_i16 v5, exec_lo, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01] + +v_mad_i32_i16 v5, exec_hi, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01] + +v_mad_i32_i16 v5, null, exec_lo, null +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01] + +v_mad_i32_i16 v5, -1, exec_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_i32_i16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x5a,0xd6,0xf0,0xfa,0x04,0x03] + +v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03] + +v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX11: encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i24 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_i32_i24 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i32_i24 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i32_i24 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_i32_i24 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_i32_i24 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mad_i32_i24 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i32_i24 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i32_i24 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_i32_i24 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i32_i24 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_i32_i24 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i32_i24 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_i32_i24 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_i32_i24 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mad_i64_i32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX11: encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u16 v5, v1, v2, v3 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v255, v255, s3 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00] + +v_mad_u32_u16 v5, s1, s2, v255 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07] + +v_mad_u32_u16 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01] + +v_mad_u32_u16 v5, vcc_lo, ttmp15, vcc_lo +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01] + +v_mad_u32_u16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u32_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u32_u16 v5, exec_lo, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01] + +v_mad_u32_u16 v5, exec_hi, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01] + +v_mad_u32_u16 v5, null, exec_lo, null +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01] + +v_mad_u32_u16 v5, -1, exec_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u32_u16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x59,0xd6,0xf0,0xfa,0x04,0x03] + +v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03] + +v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX11: encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u24 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u32_u24 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u32_u24 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u32_u24 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_u32_u24 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_u32_u24 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mad_u32_u24 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u32_u24 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u32_u24 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_u32_u24 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u32_u24 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u32_u24 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u32_u24 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_u32_u24 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_u32_u24 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mad_u64_u32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX11: encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] + +v_max3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x4c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_max3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x4c,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x4c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_max3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x4c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_max3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x4c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_max3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4c,0xd6,0xf0,0xfa,0xc0,0x43] + +v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x4c,0xd6,0xfd,0xd4,0x04,0x23] + +v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_max3_f16 v5, v255, s2, s105 mul:2 +// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09] + +v_max3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x1c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_max3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x1c,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x1c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_max3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x1c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_max3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x1c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_max3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_max3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x1c,0xd6,0xfd,0xd4,0x04,0x33] + +v_max3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x1c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_max3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_max3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_max3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_max3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_max3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_max3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_max_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] + +v_max_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xfe,0xfd,0x03,0x00] + +v_max_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x02,0x08,0x00,0x00] + +v_max_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x68,0xd0,0x00,0x00] + +v_max_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x6a,0xf4,0x00,0x00] + +v_max_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_max_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x2a,0xd7,0x7e,0xfa,0x01,0x20] + +v_max_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x7c,0xe0,0x01,0x00] + +v_max_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xc1,0x82,0x01,0x00] + +v_max_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x2a,0xd7,0xf0,0xf8,0x00,0x08] + +v_max_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x2a,0xd7,0xfd,0xfc,0x00,0x70] + +v_max_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x2a,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] + +v_max_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] + +v_max_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00] + +v_max_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00] + +v_max_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00] + +v_max_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00] + +v_max_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00] + +v_max_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7d,0xe0,0x01,0x00] + +v_max_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00] + +v_max_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00] + +v_max_i16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00] + +v_max_i16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00] + +v_max_i16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xf0,0xfa,0x00,0x00] + +v_max_i16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00] + +v_max_i16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_max_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] + +v_max_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00] + +v_max_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00] + +v_max_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00] + +v_max_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00] + +v_max_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00] + +v_max_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7d,0xe0,0x01,0x00] + +v_max_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00] + +v_max_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00] + +v_max_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00] + +v_max_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] + +v_max_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xf0,0xfa,0x00,0x00] + +v_max_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] + +v_max_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_maxmin_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maxmin_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] + +v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_maxmin_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x5e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maxmin_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x5e,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x5e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maxmin_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x5e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maxmin_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x5e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maxmin_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x5e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maxmin_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x5e,0xd6,0xfd,0xd4,0x04,0x33] + +v_maxmin_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x5e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01] + +v_maxmin_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01] + +v_maxmin_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03] + +v_maxmin_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03] + +v_maxmin_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03] + +v_maxmin_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01] + +v_maxmin_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01] + +v_maxmin_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03] + +v_maxmin_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03] + +v_maxmin_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03] + +v_maxmin_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mbcnt_hi_u32_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] + +v_mbcnt_hi_u32_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00] + +v_mbcnt_hi_u32_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_hi_u32_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00] + +v_mbcnt_hi_u32_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_lo_u32_b32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] + +v_mbcnt_lo_u32_b32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00] + +v_mbcnt_lo_u32_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_lo_u32_b32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00] + +v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_med3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x4f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_med3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x4f,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x4f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x4f,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_med3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x4f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x4f,0xd6,0xfd,0xd4,0x04,0x23] + +v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2 +// GFX11: encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b] + +v_med3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x1f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_med3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x1f,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x1f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x1f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_med3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x1f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x1f,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_med3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x1f,0xd6,0xfd,0xd4,0x04,0x33] + +v_med3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x1f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_med3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_med3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_med3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_med3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_med3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_med3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min3_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x49,0xd6,0x7b,0xfa,0xed,0xe1] + +v_min3_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x49,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x49,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x49,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x49,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_min3_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x49,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x49,0xd6,0xf0,0xfa,0xc0,0x43] + +v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x49,0xd6,0xfd,0xd4,0x04,0x23] + +v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_min3_f16 v5, m0, 0.5, m0 clamp mul:4 +// GFX11: encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11] + +v_min3_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x19,0xd6,0x7b,0xfa,0xed,0xe1] + +v_min3_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x19,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x19,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x19,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_min3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x19,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x19,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_min3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x19,0xd6,0xfd,0xd4,0x04,0x33] + +v_min3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x19,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_min3_i16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_i16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_i16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_min3_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_min3_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min3_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_min3_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_min3_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] + +v_min_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xfe,0xfd,0x03,0x00] + +v_min_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x02,0x08,0x00,0x00] + +v_min_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x68,0xd0,0x00,0x00] + +v_min_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x6a,0xf4,0x00,0x00] + +v_min_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_min_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x29,0xd7,0x7e,0xfa,0x01,0x20] + +v_min_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0x7c,0xe0,0x01,0x00] + +v_min_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xc1,0x82,0x01,0x00] + +v_min_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x29,0xd7,0xf0,0xf8,0x00,0x08] + +v_min_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x29,0xd7,0xfd,0xfc,0x00,0x70] + +v_min_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x29,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] + +v_min_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] + +v_min_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00] + +v_min_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00] + +v_min_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00] + +v_min_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00] + +v_min_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00] + +v_min_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7d,0xe0,0x01,0x00] + +v_min_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00] + +v_min_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00] + +v_min_i16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00] + +v_min_i16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00] + +v_min_i16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xf0,0xfa,0x00,0x00] + +v_min_i16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00] + +v_min_i16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_min_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] + +v_min_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00] + +v_min_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00] + +v_min_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00] + +v_min_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00] + +v_min_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00] + +v_min_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7d,0xe0,0x01,0x00] + +v_min_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00] + +v_min_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00] + +v_min_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00] + +v_min_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00] + +v_min_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xf0,0xfa,0x00,0x00] + +v_min_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00] + +v_min_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_minmax_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minmax_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minmax_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] + +v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_minmax_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x5f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minmax_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x5f,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x5f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minmax_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x5f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minmax_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x5f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minmax_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x5f,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minmax_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x5f,0xd6,0xfd,0xd4,0x04,0x33] + +v_minmax_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x5f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_i32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_i32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_i32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_i32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01] + +v_minmax_i32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_i32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_i32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01] + +v_minmax_i32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03] + +v_minmax_i32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03] + +v_minmax_i32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03] + +v_minmax_i32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01] + +v_minmax_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01] + +v_minmax_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03] + +v_minmax_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03] + +v_minmax_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03] + +v_minmax_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00] + +v_mqsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04] + +v_mqsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01] + +v_mqsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07] + +v_mqsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01] + +v_mqsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_mqsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03] + +v_mqsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX11: encoding: [0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03] + +v_mqsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX11: encoding: [0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] + +v_mqsad_u32_u8 v[5:8], v[1:2], v2, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], v255, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], s2, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], s105, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], v[254:255], ttmp15, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], s[2:3], vcc_hi, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], s[104:105], vcc_lo, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], vcc, m0, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], ttmp[14:15], exec_hi, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], exec, exec_lo, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], null, null, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], -1, -1, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07] + +v_mqsad_u32_u8 v[5:8], 0.5, 0.5, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07] + +v_mqsad_u32_u8 v[5:8], src_scc, src_scc, v[252:255] +// GFX11: encoding: [0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07] + +v_mqsad_u32_u8 v[252:255], 0xaf123456, 0xaf123456, v[3:6] clamp +// GFX11: encoding: [0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] + +v_msad_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01] + +v_msad_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01] + +v_msad_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01] + +v_msad_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04] + +v_msad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01] + +v_msad_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01] + +v_msad_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01] + +v_msad_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01] + +v_msad_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03] + +v_msad_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03] + +v_msad_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03] + +v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mul_f64 v[5:6], v[1:2], v[2:3] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] + +v_mul_f64 v[5:6], v[254:255], v[254:255] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xfe,0xfd,0x03,0x00] + +v_mul_f64 v[5:6], s[2:3], s[4:5] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x02,0x08,0x00,0x00] + +v_mul_f64 v[5:6], s[104:105], s[104:105] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x68,0xd0,0x00,0x00] + +v_mul_f64 v[5:6], vcc, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x6a,0xf4,0x00,0x00] + +v_mul_f64 v[5:6], ttmp[14:15], 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_f64 v[5:6], -|exec|, src_scc +// GFX11: encoding: [0x05,0x01,0x28,0xd7,0x7e,0xfa,0x01,0x20] + +v_mul_f64 v[5:6], null, 0.5 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0x7c,0xe0,0x01,0x00] + +v_mul_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xc1,0x82,0x01,0x00] + +v_mul_f64 v[5:6], 0.5, null mul:2 +// GFX11: encoding: [0x05,0x00,0x28,0xd7,0xf0,0xf8,0x00,0x08] + +v_mul_f64 v[5:6], -|src_scc|, -|exec| mul:4 +// GFX11: encoding: [0x05,0x03,0x28,0xd7,0xfd,0xfc,0x00,0x70] + +v_mul_f64 v[254:255], 0xaf123456, -|vcc| clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x28,0xd7,0xff,0xd4,0x00,0x58,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] + +v_mul_hi_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00] + +v_mul_hi_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00] + +v_mul_hi_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_hi_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_hi_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_hi_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_hi_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_hi_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_hi_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_hi_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_hi_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_hi_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_hi_i32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] + +v_mul_hi_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00] + +v_mul_hi_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00] + +v_mul_hi_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_hi_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_hi_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_hi_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_hi_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_hi_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_hi_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_hi_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_hi_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_hi_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_hi_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_lo_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] + +v_mul_lo_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00] + +v_mul_lo_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_lo_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_lo_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_lo_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_lo_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_lo_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_lo_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_lo_u16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_lo_u16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_lo_u16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_lo_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_lo_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_lo_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] + +v_mul_lo_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00] + +v_mul_lo_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_lo_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_lo_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_lo_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_lo_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_lo_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_lo_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_lo_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_lo_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_lo_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_lo_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_lo_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] + +v_mullit_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01] + +v_mullit_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] + +v_mullit_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] + +v_mullit_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] + +v_mullit_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mullit_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] + +v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_mullit_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] + +v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] + +v_or3_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01] + +v_or3_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01] + +v_or3_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01] + +v_or3_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04] + +v_or3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01] + +v_or3_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01] + +v_or3_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01] + +v_or3_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01] + +v_or3_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03] + +v_or3_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03] + +v_or3_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03] + +v_or3_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_or_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] + +v_or_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] + +v_or_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] + +v_or_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] + +v_or_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] + +v_or_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_or_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] + +v_or_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00] + +v_or_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] + +v_or_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] + +v_or_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] + +v_or_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] + +v_or_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00] + +v_or_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] + +v_or_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] + +v_pack_b32_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00] + +v_pack_b32_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00] + +v_pack_b32_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00] + +v_pack_b32_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00] + +v_pack_b32_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00] + +v_pack_b32_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00] + +v_pack_b32_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00] + +v_pack_b32_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00] + +v_pack_b32_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00] + +v_pack_b32_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40] + +v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20] + +v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_perm_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] + +v_perm_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01] + +v_perm_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01] + +v_perm_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01] + +v_perm_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04] + +v_perm_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_perm_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01] + +v_perm_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01] + +v_perm_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01] + +v_perm_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] + +v_perm_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_perm_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] + +v_perm_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] + +v_perm_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] + +v_perm_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane16_b32 v5, v1, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane16_b32 v5, v1, ttmp15, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane16_b32 v5, v1, vcc_hi, exec_lo +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane16_b32 v5, v1, vcc_lo, m0 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane16_b32 v5, v1, m0, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane16_b32 v5, v1, exec_hi, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane16_b32 v5, v1, exec_lo, src_scc +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX11: encoding: [0x05,0x18,0x5b,0xd6,0x01,0xf9,0xc0,0x03] + +v_permlane16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x05,0x03] + +v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX11: encoding: [0x05,0x08,0x5b,0xd6,0x01,0xe1,0xf1,0x01] + +v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] + +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + +v_permlanex16_b32 v5, v1, s2, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] + +v_permlanex16_b32 v5, v1, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlanex16_b32 v5, v1, ttmp15, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] + +v_permlanex16_b32 v5, v1, vcc_hi, exec_lo +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlanex16_b32 v5, v1, vcc_lo, m0 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlanex16_b32 v5, v1, m0, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xac,0x01] + +v_permlanex16_b32 v5, v1, exec_hi, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xa8,0x01] + +v_permlanex16_b32 v5, v1, exec_lo, src_scc +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlanex16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX11: encoding: [0x05,0x18,0x5c,0xd6,0x01,0xf9,0xc0,0x03] + +v_permlanex16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x05,0x03] + +v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX11: encoding: [0x05,0x08,0x5c,0xd6,0x01,0xe1,0xf1,0x01] + +v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] + +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] + +v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] + +v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] + +v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] + +v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] + +v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] + +v_qsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] + +v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] + +v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] + +v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX11: encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] + +v_readlane_b32 s5, v1, s2 +// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] + +v_readlane_b32 s5, v1, s105 +// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] + +v_readlane_b32 s105, v1, ttmp15 +// GFX11: encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] + +v_readlane_b32 vcc_lo, v1, vcc_hi +// GFX11: encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] + +v_readlane_b32 vcc_hi, v1, vcc_lo +// GFX11: encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] + +v_readlane_b32 ttmp15, v1, m0 +// GFX11: encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] + +v_readlane_b32 null, v255, null +// GFX11: encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] + +v_sad_hi_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_hi_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_hi_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_hi_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_hi_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_hi_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_hi_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_hi_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_hi_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_hi_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_hi_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_hi_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_hi_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_hi_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x23,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_hi_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x23,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sad_u16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_sad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u16 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u16 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u16 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u16 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u16 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u16 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x24,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u16 v255, 0xfe0b, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x24,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_sad_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x25,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u32 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x25,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sub_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x01,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x01,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x01,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x01,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x01,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x01,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x01,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x01,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_i16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_i16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_i16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_i16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_i16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_i16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_i16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +// GFX11: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_u16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_u16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_u16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_u16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_u16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_u16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_u16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_u16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_u16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_u16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +// GFX11: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX11: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_subrev_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x02,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x02,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x02,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x02,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x02,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x02,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x02,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x02,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_trig_preop_f64 v[5:6], v[1:2], v2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], v255 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0xff,0x03,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], s2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x00,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], s105 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0xd3,0x00,0x00] + +v_trig_preop_f64 v[5:6], v[254:255], ttmp15 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0xfe,0xf7,0x00,0x00] + +v_trig_preop_f64 v[5:6], s[2:3], vcc_hi +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x02,0xd6,0x00,0x00] + +v_trig_preop_f64 v[5:6], s[104:105], vcc_lo +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x68,0xd4,0x00,0x00] + +v_trig_preop_f64 v[5:6], vcc, m0 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x6a,0xfa,0x00,0x00] + +v_trig_preop_f64 v[5:6], ttmp[14:15], exec_hi +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x7a,0xfe,0x00,0x00] + +v_trig_preop_f64 v[5:6], exec, exec_lo +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x7e,0xfc,0x00,0x00] + +v_trig_preop_f64 v[5:6], null, null +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x7c,0xf8,0x00,0x00] + +v_trig_preop_f64 v[5:6], -1, -1 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0xc1,0x82,0x01,0x00] + +v_trig_preop_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd7,0xf0,0xe0,0x01,0x08] + +v_trig_preop_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX11: encoding: [0x05,0x01,0x2f,0xd7,0xfd,0xfa,0x01,0x30] + +v_trig_preop_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] + +v_writelane_b32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00] + +v_writelane_b32 v5, s105, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x69,0x04,0x00,0x00] + +v_writelane_b32 v5, vcc_lo, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x6a,0x04,0x00,0x00] + +v_writelane_b32 v5, vcc_hi, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x6b,0x04,0x00,0x00] + +v_writelane_b32 v5, ttmp15, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7b,0x04,0x00,0x00] + +v_writelane_b32 v5, m0, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7d,0x04,0x00,0x00] + +v_writelane_b32 v5, exec_lo, s2 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7e,0x04,0x00,0x00] + +v_writelane_b32 v5, exec_hi, s105 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7f,0xd2,0x00,0x00] + +v_writelane_b32 v5, null, ttmp15 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0x7c,0xf6,0x00,0x00] + +v_writelane_b32 v5, -1, null +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0xc1,0xf8,0x00,0x00] + +v_writelane_b32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0xf0,0xfa,0x00,0x00] + +v_writelane_b32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x61,0xd7,0xfd,0xd4,0x00,0x00] + +v_writelane_b32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x61,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x00] + +v_xad_u32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0xff,0x05,0xa4,0x01] + +v_xad_u32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x01,0xfe,0xff,0x01] + +v_xad_u32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x69,0xd2,0xf8,0x01] + +v_xad_u32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x6a,0xf6,0x0c,0x04] + +v_xad_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7b,0xfa,0xed,0x01] + +v_xad_u32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7d,0xe0,0xf5,0x01] + +v_xad_u32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7e,0x82,0xad,0x01] + +v_xad_u32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7f,0xf8,0xa8,0x01] + +v_xad_u32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0xc1,0xfe,0xf4,0x03] + +v_xad_u32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0xf0,0xfa,0xc0,0x03] + +v_xad_u32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x45,0xd6,0xfd,0xd4,0x04,0x03] + +v_xad_u32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] + +v_xor3_b32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0xff,0x05,0xa4,0x01] + +v_xor3_b32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x01,0xfe,0xff,0x01] + +v_xor3_b32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x69,0xd2,0xf8,0x01] + +v_xor3_b32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x6a,0xf6,0x0c,0x04] + +v_xor3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7b,0xfa,0xed,0x01] + +v_xor3_b32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7d,0xe0,0xf5,0x01] + +v_xor3_b32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7e,0x82,0xad,0x01] + +v_xor3_b32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7f,0xf8,0xa8,0x01] + +v_xor3_b32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0xc1,0xfe,0xf4,0x03] + +v_xor3_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0xf0,0xfa,0xc0,0x03] + +v_xor3_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x40,0xd6,0xfd,0xd4,0x04,0x03] + +v_xor3_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_xor_b16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] + +v_xor_b16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] + +v_xor_b16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] + +v_xor_b16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] + +v_xor_b16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] + +v_xor_b16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_xor_b16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] + +v_xor_b16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7d,0xe0,0x01,0x00] + +v_xor_b16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] + +v_xor_b16 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] + +v_xor_b16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] + +v_xor_b16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] + +v_xor_b16 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0xf0,0xfa,0x00,0x00] + +v_xor_b16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] + +v_xor_b16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index e025ab73933eb..dadb515630b66 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add3_u32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16-fake16.s new file mode 100644 index 0000000000000..2371dbc8c1b8f --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16-fake16.s @@ -0,0 +1,4695 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] + +v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x4c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x4c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x4c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x1c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x1c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x1c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x1c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x1c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_max3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x1c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x1c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x5e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x5e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x5e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x5e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maxmin_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x5e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maxmin_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x5e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maxmin_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x5e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x4f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x4f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x4f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x1f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_med3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x1f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_med3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x1f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x1f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x1f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_med3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x1f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x1f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x49,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x49,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x49,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x19,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_min3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x19,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_min3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x19,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x19,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x19,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_min3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x19,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x19,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x5f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x5f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minmax_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x5f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minmax_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x5f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minmax_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x5f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minmax_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x5f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minmax_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x5f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x4c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x4c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x4c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x4c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x4c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x4f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x4f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x4f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x4f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x4f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x7c,0x49,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x0b,0x49,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x15,0x49,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x26,0x49,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc7,0x49,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX11: encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 58fec38cf57fb..ceb8cac21f5aa 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8-fake16.s new file mode 100644 index 0000000000000..cf2a7ab7ef76c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8-fake16.s @@ -0,0 +1,2968 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s + +v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x55,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x55,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x00,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x00,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x00,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x47,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x26,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x16,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x16,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x17,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x62,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x62,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x57,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x57,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x3a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x3a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x3a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bcnt_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x11,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x11,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x10,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x10,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x12,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x12,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x0c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x0c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x0c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x0c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x0c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x0c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x0f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x0f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x0f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x0f,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x0f,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x0f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x0d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x0d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x0d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x0d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x0d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x0d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x0e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x0e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x0e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x0e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x0e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x0e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x06,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x06,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x06,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x24,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x24,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x07,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x07,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x07,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x23,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x23,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x26,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x26,0xd6,0xe9,0xfe,0xf7,0x23,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x21,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x21,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x21,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x22,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x22,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x48,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x13,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x13,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x13,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x13,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x13,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x13,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x13,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1c,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x1c,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x15,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x15,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x46,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x46,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x56,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x56,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x38,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x38,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x39,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x5a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x5a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x59,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x59,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0b,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x4c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x4c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x4c,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x4c,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x1c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x1c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x1c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x1c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x1c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x1c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x1c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1d,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1e,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x09,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x60,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x60,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maxmin_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x5e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x5e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x5e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x5e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x5e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x5e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maxmin_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x5e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x64,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x64,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x62,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x62,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x20,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x20,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1f,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1f,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_med3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x4f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x4f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x4f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x4f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x1f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x1f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x1f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x1f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x1f,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x1f,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x1f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x20,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x20,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x21,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x21,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x49,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x49,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x49,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x49,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x49,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x19,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x19,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x19,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x19,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x19,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x19,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x19,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0c,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0b,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x61,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x61,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minmax_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x5f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x5f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x5f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x5f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x5f,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x5f,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minmax_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x5f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x65,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x65,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x63,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x63,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x39,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x39,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x05,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x05,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x18,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x02,0x18,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x04,0x18,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x03,0x18,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x05,0x18,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x06,0x18,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x87,0x18,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x58,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x58,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x63,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x44,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x44,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x23,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x23,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x24,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x24,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x25,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x25,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x22,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x01,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x01,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x25,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x02,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x02,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x02,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x45,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x45,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x40,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x40,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x64,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x5a,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x59,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x90,0x59,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x4c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x4c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x4c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x4c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x4c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x4f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x4f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x4f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x4f,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x4f,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x7c,0x49,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0b,0x49,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x15,0x49,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x26,0x49,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc7,0x49,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 2fb95663a2f85..446c08347b3a2 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR,W64-ERR --implicit-check-not=error: %s v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3-fake16.s new file mode 100644 index 0000000000000..5329849855de3 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3-fake16.s @@ -0,0 +1,7294 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add3_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] + +v_add3_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xff,0x05,0xa4,0x01] + +v_add3_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x01,0xfe,0xff,0x01] + +v_add3_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x69,0xd2,0xf8,0x01] + +v_add3_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x6a,0xf6,0x0c,0x04] + +v_add3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_add3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7b,0xfa,0xed,0x01] + +v_add3_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7d,0xe0,0xf5,0x01] + +v_add3_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7e,0x82,0xad,0x01] + +v_add3_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7f,0xf8,0xa8,0x01] + +v_add3_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_add3_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xc1,0xfe,0xf4,0x03] + +v_add3_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xf0,0xfa,0xc0,0x03] + +v_add3_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xfd,0xd4,0x04,0x03] + +v_add3_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x00,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x00,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x00,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x00,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x00,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x00,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x00,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x00,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x00,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x00,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x01,0x05,0x0e,0x00] + +v_add_lshl_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0xff,0x05,0xa4,0x01] + +v_add_lshl_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x01,0xfe,0xff,0x01] + +v_add_lshl_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x69,0xd2,0xf8,0x01] + +v_add_lshl_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x6a,0xf6,0x0c,0x04] + +v_add_lshl_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7b,0xfa,0xed,0x01] + +v_add_lshl_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7d,0xe0,0xf5,0x01] + +v_add_lshl_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7e,0x82,0xad,0x01] + +v_add_lshl_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7f,0xf8,0xa8,0x01] + +v_add_lshl_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_add_lshl_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0xc1,0xfe,0xf4,0x03] + +v_add_lshl_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0xf0,0xfa,0xc0,0x03] + +v_add_lshl_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x47,0xd6,0xfd,0xd4,0x04,0x03] + +v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_nc_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +// GFX12: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_i32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_i32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_i32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_i32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_i32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_i32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_i32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_i32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_i32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x26,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] + +v_add_nc_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] + +v_add_nc_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] + +v_add_nc_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] + +v_add_nc_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00] + +v_add_nc_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] + +v_add_nc_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] + +v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +// GFX12: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00] + +v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX12: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_alignbit_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] + +v_alignbit_b32 v5, v255, s2, s3 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0xff,0x05,0x0c,0x00] + +v_alignbit_b32 v5, s1, v255, s3 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x01,0xfe,0x0f,0x00] + +v_alignbit_b32 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x69,0xd2,0xa4,0x01] + +v_alignbit_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x6a,0xf6,0x0c,0x04] + +v_alignbit_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_alignbit_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7b,0xfa,0xed,0x01] + +v_alignbit_b32 v5, m0, 0.5, exec_lo +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7d,0xe0,0xf9,0x01] + +v_alignbit_b32 v5, exec_lo, -1, m0 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7e,0x82,0xf5,0x01] + +v_alignbit_b32 v5, exec_hi, null, vcc_hi +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7f,0xf8,0xac,0x01] + +v_alignbit_b32 v5, null, exec_lo, vcc_lo +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0x7c,0xfc,0xa8,0x01] + +v_alignbit_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0xc1,0xfe,0xf4,0x03] + +v_alignbit_b32 v5, 0.5, m0, exec_hi +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0xf0,0xfa,0xfc,0x01] + +v_alignbit_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x16,0xd6,0xfd,0xd4,0x04,0x03] + +v_alignbit_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x16,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_alignbyte_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x01,0x05,0x0e,0x00] + +v_alignbyte_b32 v5, v255, s2, s3 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0xff,0x05,0x0c,0x00] + +v_alignbyte_b32 v5, s1, v255, s3 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x01,0xfe,0x0f,0x00] + +v_alignbyte_b32 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] + +v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] + +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] + +v_alignbyte_b32 v5, m0, 0.5, exec_lo +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7d,0xe0,0xf9,0x01] + +v_alignbyte_b32 v5, exec_lo, -1, m0 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7e,0x82,0xf5,0x01] + +v_alignbyte_b32 v5, exec_hi, null, vcc_hi +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7f,0xf8,0xac,0x01] + +v_alignbyte_b32 v5, null, exec_lo, vcc_lo +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0x7c,0xfc,0xa8,0x01] + +v_alignbyte_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0xc1,0xfe,0xf4,0x03] + +v_alignbyte_b32 v5, 0.5, m0, exec_hi +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0xf0,0xfa,0xfc,0x01] + +v_alignbyte_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x17,0xd6,0xfd,0xd4,0x04,0x03] + +v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_and_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] + +v_and_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] + +v_and_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] + +v_and_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] + +v_and_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] + +v_and_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_and_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] + +v_and_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7d,0xe0,0x01,0x00] + +v_and_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] + +v_and_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] + +v_and_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] + +v_and_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] + +v_and_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0xf0,0xfa,0x00,0x00] + +v_and_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] + +v_and_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_and_or_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] + +v_and_or_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0xff,0x05,0xa4,0x01] + +v_and_or_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x01,0xfe,0xff,0x01] + +v_and_or_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x69,0xd2,0xf8,0x01] + +v_and_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x6a,0xf6,0x0c,0x04] + +v_and_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_and_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7b,0xfa,0xed,0x01] + +v_and_or_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7d,0xe0,0xf5,0x01] + +v_and_or_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7e,0x82,0xad,0x01] + +v_and_or_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7f,0xf8,0xa8,0x01] + +v_and_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_and_or_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0xc1,0xfe,0xf4,0x03] + +v_and_or_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0xf0,0xfa,0xc0,0x03] + +v_and_or_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x57,0xd6,0xfd,0xd4,0x04,0x03] + +v_and_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x57,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_ashrrev_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x01,0x05,0x02,0x00] + +v_ashrrev_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0xff,0xff,0x03,0x00] + +v_ashrrev_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x01,0x04,0x00,0x00] + +v_ashrrev_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x69,0xd2,0x00,0x00] + +v_ashrrev_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x6a,0xf6,0x00,0x00] + +v_ashrrev_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_ashrrev_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7b,0xfa,0x01,0x00] + +v_ashrrev_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7d,0xe0,0x01,0x00] + +v_ashrrev_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7e,0x82,0x01,0x00] + +v_ashrrev_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7f,0xf8,0x00,0x00] + +v_ashrrev_i16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0x7c,0xfc,0x00,0x00] + +v_ashrrev_i16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0xc1,0xfe,0x00,0x00] + +v_ashrrev_i16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0xf0,0xfa,0x00,0x00] + +v_ashrrev_i16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x3a,0xd7,0xfd,0xd4,0x00,0x00] + +v_ashrrev_i16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_ashrrev_i64 v[5:6], v1, vcc +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] + +v_ashrrev_i64 v[5:6], v255, exec +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0xff,0xfd,0x00,0x00] + +v_ashrrev_i64 v[5:6], exec_lo, v[2:3] +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0x7e,0x04,0x02,0x00] + +v_ashrrev_i64 v[5:6], exec_hi, v[254:255] +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0x7f,0xfc,0x03,0x00] + +v_ashrrev_i64 v[5:6], null, null +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0x7c,0xf8,0x00,0x00] + +v_ashrrev_i64 v[5:6], -1, -1 +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0xc1,0x82,0x01,0x00] + +v_ashrrev_i64 v[5:6], 0.5, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ashrrev_i64 v[5:6], src_scc, src_scc +// GFX12: encoding: [0x05,0x00,0x3e,0xd7,0xfd,0xfa,0x01,0x00] + +v_ashrrev_i64 v[254:255], 0xaf123456, 0.5 +// GFX12: encoding: [0xfe,0x00,0x3e,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bcnt_u32_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x01,0x05,0x02,0x00] + +v_bcnt_u32_b32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0xff,0xff,0x03,0x00] + +v_bcnt_u32_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x01,0x04,0x00,0x00] + +v_bcnt_u32_b32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x69,0xd2,0x00,0x00] + +v_bcnt_u32_b32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x6a,0xf6,0x00,0x00] + +v_bcnt_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bcnt_u32_b32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7b,0xfa,0x01,0x00] + +v_bcnt_u32_b32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7d,0xe0,0x01,0x00] + +v_bcnt_u32_b32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7e,0x82,0x01,0x00] + +v_bcnt_u32_b32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7f,0xf8,0x00,0x00] + +v_bcnt_u32_b32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0x7c,0xfc,0x00,0x00] + +v_bcnt_u32_b32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0xc1,0xfe,0x00,0x00] + +v_bcnt_u32_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0xf0,0xfa,0x00,0x00] + +v_bcnt_u32_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1e,0xd7,0xfd,0xd4,0x00,0x00] + +v_bcnt_u32_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x1e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x01,0x05,0x0e,0x00] + +v_bfe_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0xff,0x05,0xa4,0x01] + +v_bfe_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x01,0xfe,0xff,0x01] + +v_bfe_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfe_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfe_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfe_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfe_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7e,0x82,0xad,0x01] + +v_bfe_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfe_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfe_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfe_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfe_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x11,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfe_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x11,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x01,0x05,0x0e,0x00] + +v_bfe_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0xff,0x05,0xa4,0x01] + +v_bfe_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x01,0xfe,0xff,0x01] + +v_bfe_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfe_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfe_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfe_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfe_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7e,0x82,0xad,0x01] + +v_bfe_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfe_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfe_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfe_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfe_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x10,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfe_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x10,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x01,0x05,0x0e,0x00] + +v_bfi_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0xff,0x05,0xa4,0x01] + +v_bfi_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x01,0xfe,0xff,0x01] + +v_bfi_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x69,0xd2,0xf8,0x01] + +v_bfi_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x6a,0xf6,0x0c,0x04] + +v_bfi_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7b,0xfa,0xed,0x01] + +v_bfi_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7d,0xe0,0xf5,0x01] + +v_bfi_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7e,0x82,0xad,0x01] + +v_bfi_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bfi_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_bfi_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0xc1,0xfe,0xf4,0x03] + +v_bfi_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0xf0,0xfa,0xc0,0x03] + +v_bfi_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x12,0xd6,0xfd,0xd4,0x04,0x03] + +v_bfi_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x12,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_bfm_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x01,0x05,0x02,0x00] + +v_bfm_b32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0xff,0xff,0x03,0x00] + +v_bfm_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x01,0x04,0x00,0x00] + +v_bfm_b32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x69,0xd2,0x00,0x00] + +v_bfm_b32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x6a,0xf6,0x00,0x00] + +v_bfm_b32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_bfm_b32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7b,0xfa,0x01,0x00] + +v_bfm_b32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7d,0xe0,0x01,0x00] + +v_bfm_b32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7e,0x82,0x01,0x00] + +v_bfm_b32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7f,0xf8,0x00,0x00] + +v_bfm_b32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0x7c,0xfc,0x00,0x00] + +v_bfm_b32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0xc1,0xfe,0x00,0x00] + +v_bfm_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0xf0,0xfa,0x00,0x00] + +v_bfm_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1d,0xd7,0xfd,0xd4,0x00,0x00] + +v_bfm_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cndmask_b16 v5, v1, src_scc, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v255, 0.5, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, s105, s105, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, vcc_hi, v2, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, m0, v255, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_hi, exec_hi, s3 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, null, m0, s105 +// W32: encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo +// W32: encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, 0.5, -1, vcc_hi +// W32: encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -|src_scc|, null, ttmp15 +// W32: encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v1, src_scc, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, v255, 0.5, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, s105, s105, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, vcc_hi, v2, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, m0, v255, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, null, m0, s[6:7] +// W64: encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] +// W64: encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, 0.5, -1, vcc +// W64: encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] +// W64: encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null +// GFX12: encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_cubeid_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] + +v_cubeid_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0xff,0x05,0xa4,0x01] + +v_cubeid_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x01,0xfe,0xff,0x01] + +v_cubeid_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubeid_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubeid_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubeid_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x0c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubeid_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubeid_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x0c,0xd6,0x7e,0x82,0xad,0x01] + +v_cubeid_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x0c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubeid_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x0c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubeid_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x0c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubeid_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x0c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubeid_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x0c,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubeid_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x0c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x01,0x05,0x0e,0x00] + +v_cubema_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0xff,0x05,0xa4,0x01] + +v_cubema_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x01,0xfe,0xff,0x01] + +v_cubema_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubema_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubema_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x0f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubema_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubema_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x0f,0xd6,0x7e,0x82,0xad,0x01] + +v_cubema_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x0f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubema_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x0f,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubema_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x0f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubema_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x0f,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubema_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x0f,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubema_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x0f,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x01,0x05,0x0e,0x00] + +v_cubesc_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0xff,0x05,0xa4,0x01] + +v_cubesc_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x01,0xfe,0xff,0x01] + +v_cubesc_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubesc_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubesc_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x0d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubesc_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubesc_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x0d,0xd6,0x7e,0x82,0xad,0x01] + +v_cubesc_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x0d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubesc_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x0d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubesc_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x0d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubesc_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x0d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubesc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x0d,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubesc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x0d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x01,0x05,0x0e,0x00] + +v_cubetc_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0xff,0x05,0xa4,0x01] + +v_cubetc_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x01,0xfe,0xff,0x01] + +v_cubetc_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x69,0xd2,0xf8,0x01] + +v_cubetc_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cubetc_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x0e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_cubetc_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cubetc_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x0e,0xd6,0x7e,0x82,0xad,0x01] + +v_cubetc_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x0e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_cubetc_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x0e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_cubetc_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x0e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_cubetc_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x0e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_cubetc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x0e,0xd6,0xfd,0xd4,0x04,0x33] + +v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_cvt_pk_fp8_f32 v1, v2, v3 +// GFX12: encoding: [0x01,0x00,0x69,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_pk_fp8_f32 v1, -v2, |v3| +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_pk_fp8_f32 v1, s2, 3 +// GFX12: encoding: [0x01,0x00,0x69,0xd7,0x02,0x06,0x01,0x00] + +v_cvt_pk_bf8_f32 v1, v2, v3 +// GFX12: encoding: [0x01,0x00,0x6a,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_pk_bf8_f32 v1, -v2, |v3| +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] + +v_cvt_pk_bf8_f32 v1, s2, 3 +// GFX12: encoding: [0x01,0x00,0x6a,0xd7,0x02,0x06,0x01,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v10, s2, v5 +// GFX12: encoding: [0x0a,0x00,0x6b,0xd7,0x02,0x0a,0x02,0x00] + +v_cvt_sr_fp8_f32 v5, -|v255|, v4 +// GFX12: encoding: [0x05,0x01,0x6b,0xd7,0xff,0x09,0x02,0x20] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 +// GFX12: v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x6b,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v10, s2, v5 +// GFX12: encoding: [0x0a,0x00,0x6c,0xd7,0x02,0x0a,0x02,0x00] + +v_cvt_sr_bf8_f32 v5, -|v255|, v4 +// GFX12: encoding: [0x05,0x01,0x6c,0xd7,0xff,0x09,0x02,0x20] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 ; encoding: [0x01,0x20,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 ; encoding: [0x01,0x40,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 +// GFX12: v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 ; encoding: [0x01,0x60,0x6c,0xd7,0x02,0x07,0x02,0x00] + +v_cvt_pk_i16_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_i16_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_i16_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_i16_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_i16_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_i16_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_i16_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_i16_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_i16_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_i16_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_i32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_i16_i32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_i16_i32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_i16_i32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_i16_i32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_i16_i32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_i16_i32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_i16_i32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_i16_i32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_i16_i32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_i16_i32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] + +v_cvt_pk_i16_i32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] + +v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_u16_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_u16_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_u16_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_u16_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_u16_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_u16_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_u16_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_u16_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_u16_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_u16_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_u32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_u16_u32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_u16_u32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_u16_u32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u16_u32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_u16_u32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_u16_u32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_u16_u32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_u16_u32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_u16_u32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_u16_u32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] + +v_cvt_pk_u16_u32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] + +v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] + +v_cvt_pk_u8_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] + +v_cvt_pk_u8_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] + +v_cvt_pk_u8_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] + +v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] + +v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] + +v_cvt_pk_u8_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] + +v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] + +v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] + +v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] + +v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] + +v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] + +v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null +// GFX12: encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_i16_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_i16_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_i16_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_i16_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_norm_u16_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_norm_u16_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_div_fixup_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] + +v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] + +v_div_fixup_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] + +v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] + +v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] + +v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] + +v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_div_fixup_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] + +v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] + +v_div_fixup_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] + +v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] + +v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] + +v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] + +v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] + +v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] + +v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX12: encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] + +v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX12: encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] + +v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX12: encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX12: encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] + +v_div_fixup_f64 v[5:6], null, 0.5, vcc +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] + +v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX12: encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] + +v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX12: encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] + +v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX12: encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] + +v_div_fmas_f32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0xaa,0x01] + +v_div_fmas_f32 v5, ttmp15, ttmp15, ttmp15 +// W32: encoding: [0x05,0x00,0x37,0xd6,0x7b,0xf6,0xec,0x01] + +v_div_fmas_f32 v5, -|m0|, -|v255|, v3 +// W32: encoding: [0x05,0x03,0x37,0xd6,0x7d,0xfe,0x0f,0x64] + +v_div_fmas_f32 v5, -|exec_lo|, -|exec_lo|, -|exec_lo| +// W32: encoding: [0x05,0x07,0x37,0xd6,0x7e,0xfc,0xf8,0xe1] + +v_div_fmas_f32 v5, -|exec_hi|, 0.5, -|v255| +// W32: encoding: [0x05,0x05,0x37,0xd6,0x7f,0xe0,0xfd,0xa7] + +v_div_fmas_f32 v5, null, exec_hi, -|exec_hi| +// W32: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfe,0xfc,0x81] + +v_div_fmas_f32 v5, -1, -|m0|, -|m0| +// W32: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xfa,0xf4,0xc1] + +v_div_fmas_f32 v5, 0.5, -|vcc_lo|, 0.5 mul:2 +// W32: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd4,0xc0,0x4b] + +v_div_fmas_f32 v5, vcc_lo, v2, v3 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] + +v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] + +v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 +// W64: encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] + +v_div_fmas_f32 v5, m0, 0.5, v255 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] + +v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| +// W64: encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] + +v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| +// W64: encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] + +v_div_fmas_f32 v5, null, m0, -|m0| +// W64: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] + +v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| +// W64: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] + +v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 +// W64: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] + +v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fmas_f32 v5, v255, src_scc, src_scc +// GFX12: encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] + +v_div_fmas_f32 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] + +v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 +// GFX12: encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] + +v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 +// GFX12: encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] + +v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] +// GFX12: encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] + +v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] +// GFX12: encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] + +v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| +// GFX12: encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] + +v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX12: encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] + +v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null +// GFX12: encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] + +v_div_fmas_f64 v[5:6], null, 0.5, -src_scc +// GFX12: encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] + +v_div_fmas_f64 v[5:6], -1, -exec, |exec| +// GFX12: encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] + +v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 +// GFX12: encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] + +v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 +// GFX12: encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] + +v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 +// GFX12: encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] + +v_div_scale_f32 v5, vcc_lo, v1, v2, s3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, v255, s2, s105 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W32: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, v1, v2, s3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, v255, s2, s105 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, s1, v255, exec_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, s105, s105, exec_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, m0, 0.5, m0 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W64: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 +// W32: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 +// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] + +v_dot2_bf16_bf16 v5, v255, v255, s105 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] + +v_dot2_bf16_bf16 v5, s1, s2, v3 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] + +v_dot2_bf16_bf16 v5, s105, s105, m0 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] + +v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] + +v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] + +v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo +// GFX12: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] + +v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| +// GFX12: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] + +v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| +// GFX12: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] + +v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| +// GFX12: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] + +v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] + +v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] + +v_dot2_f16_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] + +v_dot2_f16_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] + +v_dot2_f16_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] + +v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] + +v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] + +v_dot2_f16_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] + +v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] + +v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| +// GFX12: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] + +v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] + +v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_fma_dx9_zero_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_dx9_zero_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_dx9_zero_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_dx9_zero_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_dx9_zero_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_fma_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] + +v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] + +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_fma_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] + +v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] + +v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] + +v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX12: encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] + +v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX12: encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] + +v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX12: encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX12: encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] + +v_fma_f64 v[5:6], null, 0.5, vcc +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] + +v_fma_f64 v[5:6], -1, -1, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX12: encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] + +v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX12: encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] + +v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX12: encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] + +v_fma_dx9_zero_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] + +v_fma_dx9_zero_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] + +v_fma_dx9_zero_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] + +v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] + +v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] + +v_fma_dx9_zero_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] + +v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] + +v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] + +v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_ldexp_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] + +v_ldexp_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] + +v_ldexp_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] + +v_ldexp_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] + +v_ldexp_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] + +v_ldexp_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ldexp_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] + +v_ldexp_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] + +v_ldexp_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] + +v_ldexp_f32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] + +v_ldexp_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] + +v_ldexp_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] + +v_ldexp_f32 v5, 0.5, m0 mul:2 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] + +v_ldexp_f32 v5, src_scc, vcc_lo mul:4 +// GFX12: encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] + +v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX12: encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ldexp_f64 v[5:6], v[1:2], v2 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] + +v_ldexp_f64 v[5:6], v[1:2], v255 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] + +v_ldexp_f64 v[5:6], v[1:2], s2 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] + +v_ldexp_f64 v[5:6], v[1:2], s105 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] + +v_ldexp_f64 v[5:6], v[254:255], ttmp15 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] + +v_ldexp_f64 v[5:6], s[2:3], vcc_hi +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] + +v_ldexp_f64 v[5:6], s[104:105], vcc_lo +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] + +v_ldexp_f64 v[5:6], vcc, m0 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] + +v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] + +v_ldexp_f64 v[5:6], exec, exec_lo +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] + +v_ldexp_f64 v[5:6], null, null +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] + +v_ldexp_f64 v[5:6], -1, -1 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] + +v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] + +v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX12: encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] + +v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX12: encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] + +v_lerp_u8 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] + +v_lerp_u8 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] + +v_lerp_u8 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] + +v_lerp_u8 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] + +v_lerp_u8 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lerp_u8 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] + +v_lerp_u8 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lerp_u8 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lerp_u8 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lerp_u8 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lerp_u8 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] + +v_lerp_u8 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] + +v_lshl_add_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0xff,0x05,0xa4,0x01] + +v_lshl_add_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x01,0xfe,0xff,0x01] + +v_lshl_add_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x69,0xd2,0xf8,0x01] + +v_lshl_add_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lshl_add_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7b,0xfa,0xed,0x01] + +v_lshl_add_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lshl_add_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7e,0x82,0xad,0x01] + +v_lshl_add_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lshl_add_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lshl_add_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lshl_add_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lshl_add_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x46,0xd6,0xfd,0xd4,0x04,0x03] + +v_lshl_add_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x46,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x01,0x05,0x0e,0x00] + +v_lshl_or_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0xff,0x05,0xa4,0x01] + +v_lshl_or_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x01,0xfe,0xff,0x01] + +v_lshl_or_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x69,0xd2,0xf8,0x01] + +v_lshl_or_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x6a,0xf6,0x0c,0x04] + +v_lshl_or_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7b,0xfa,0xed,0x01] + +v_lshl_or_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7d,0xe0,0xf5,0x01] + +v_lshl_or_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7e,0x82,0xad,0x01] + +v_lshl_or_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7f,0xf8,0xa8,0x01] + +v_lshl_or_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_lshl_or_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0xc1,0xfe,0xf4,0x03] + +v_lshl_or_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0xf0,0xfa,0xc0,0x03] + +v_lshl_or_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x56,0xd6,0xfd,0xd4,0x04,0x03] + +v_lshl_or_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x56,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_lshlrev_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x01,0x05,0x02,0x00] + +v_lshlrev_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0xff,0xff,0x03,0x00] + +v_lshlrev_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x01,0x04,0x00,0x00] + +v_lshlrev_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x69,0xd2,0x00,0x00] + +v_lshlrev_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x6a,0xf6,0x00,0x00] + +v_lshlrev_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7b,0xfa,0x01,0x00] + +v_lshlrev_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7d,0xe0,0x01,0x00] + +v_lshlrev_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7e,0x82,0x01,0x00] + +v_lshlrev_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7f,0xf8,0x00,0x00] + +v_lshlrev_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0x7c,0xfc,0x00,0x00] + +v_lshlrev_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0xc1,0xfe,0x00,0x00] + +v_lshlrev_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0xf0,0xfa,0x00,0x00] + +v_lshlrev_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x38,0xd7,0xfd,0xd4,0x00,0x00] + +v_lshlrev_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x01,0x05,0x02,0x00] + +v_lshrrev_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0xff,0xff,0x03,0x00] + +v_lshrrev_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x01,0x04,0x00,0x00] + +v_lshrrev_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x69,0xd2,0x00,0x00] + +v_lshrrev_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x6a,0xf6,0x00,0x00] + +v_lshrrev_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7b,0xfa,0x01,0x00] + +v_lshrrev_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7d,0xe0,0x01,0x00] + +v_lshrrev_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7e,0x82,0x01,0x00] + +v_lshrrev_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7f,0xf8,0x00,0x00] + +v_lshrrev_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0x7c,0xfc,0x00,0x00] + +v_lshrrev_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0xc1,0xfe,0x00,0x00] + +v_lshrrev_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0xf0,0xfa,0x00,0x00] + +v_lshrrev_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x39,0xd7,0xfd,0xd4,0x00,0x00] + +v_lshrrev_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_lshrrev_b64 v[5:6], v1, vcc +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] + +v_lshrrev_b64 v[5:6], v255, exec +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0xff,0xfd,0x00,0x00] + +v_lshrrev_b64 v[5:6], exec_lo, v[2:3] +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0x7e,0x04,0x02,0x00] + +v_lshrrev_b64 v[5:6], exec_hi, v[254:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0x7f,0xfc,0x03,0x00] + +v_lshrrev_b64 v[5:6], null, null +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0x7c,0xf8,0x00,0x00] + +v_lshrrev_b64 v[5:6], -1, -1 +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0xc1,0x82,0x01,0x00] + +v_lshrrev_b64 v[5:6], 0.5, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0xf0,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b64 v[5:6], src_scc, src_scc +// GFX12: encoding: [0x05,0x00,0x3d,0xd7,0xfd,0xfa,0x01,0x00] + +v_lshrrev_b64 v[254:255], 0xaf123456, 0.5 +// GFX12: encoding: [0xfe,0x00,0x3d,0xd7,0xff,0xe0,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mad_i16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_i16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_i16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x53,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x53,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x53,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x53,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x53,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x53,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i16 v5, v1, v2, v3 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x05,0x0e,0x04] + +v_mad_i32_i16 v5, v255, v255, s3 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0xff,0xff,0x0f,0x00] + +v_mad_i32_i16 v5, s1, s2, v255 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x01,0x04,0xfc,0x07] + +v_mad_i32_i16 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x69,0xd2,0xa4,0x01] + +v_mad_i32_i16 v5, vcc_lo, ttmp15, vcc_lo +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x6a,0xf6,0xa8,0x01] + +v_mad_i32_i16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i32_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i32_i16 v5, exec_lo, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7e,0x82,0xfd,0x01] + +v_mad_i32_i16 v5, exec_hi, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7f,0xf8,0xf8,0x01] + +v_mad_i32_i16 v5, null, exec_lo, null +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0x7c,0xfc,0xf0,0x01] + +v_mad_i32_i16 v5, -1, exec_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_i32_i16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x5a,0xd6,0xf0,0xfa,0x04,0x03] + +v_mad_i32_i16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x5a,0xd6,0xfd,0xd4,0xf4,0x03] + +v_mad_i32_i16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX12: encoding: [0xff,0x90,0x5a,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_i32_i24 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_i32_i24 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_i32_i24 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_i32_i24 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_i32_i24 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_i32_i24 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mad_i32_i24 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_i32_i24 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_i32_i24 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_i32_i24 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_i32_i24 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_i32_i24 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_i32_i24 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_i32_i24 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0a,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_i32_i24 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x0a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mad_co_i64_i32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xff,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xff,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xff,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xff,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_i64_i32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX12: encoding: [0xfe,0xfc,0xff,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x41,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x41,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x41,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x41,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x41,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x41,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u16 v5, v1, v2, v3 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x01,0x05,0x0e,0x04] + +v_mad_u32_u16 v5, v255, v255, s3 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0xff,0xff,0x0f,0x00] + +v_mad_u32_u16 v5, s1, s2, v255 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x01,0x04,0xfc,0x07] + +v_mad_u32_u16 v5, s105, s105, s105 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x69,0xd2,0xa4,0x01] + +v_mad_u32_u16 v5, vcc_lo, ttmp15, vcc_lo +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x6a,0xf6,0xa8,0x01] + +v_mad_u32_u16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u32_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u32_u16 v5, exec_lo, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7e,0x82,0xfd,0x01] + +v_mad_u32_u16 v5, exec_hi, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7f,0xf8,0xf8,0x01] + +v_mad_u32_u16 v5, null, exec_lo, null +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0x7c,0xfc,0xf0,0x01] + +v_mad_u32_u16 v5, -1, exec_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0xc1,0xfe,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u32_u16 v5, 0.5, m0, -1 op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x59,0xd6,0xf0,0xfa,0x04,0x03] + +v_mad_u32_u16 v5, src_scc, vcc_lo, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x59,0xd6,0xfd,0xd4,0xf4,0x03] + +v_mad_u32_u16 v255, 0xfe0b, vcc_hi, 0.5 op_sel:[0,1,0,0] clamp +// GFX12: encoding: [0xff,0x90,0x59,0xd6,0xff,0xd6,0xc0,0x03,0x0b,0xfe,0x00,0x00] + +v_mad_u32_u24 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x01,0x05,0x0e,0x00] + +v_mad_u32_u24 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0xff,0x05,0xa4,0x01] + +v_mad_u32_u24 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x01,0xfe,0xff,0x01] + +v_mad_u32_u24 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x69,0xd2,0xf8,0x01] + +v_mad_u32_u24 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mad_u32_u24 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mad_u32_u24 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7b,0xfa,0xed,0x01] + +v_mad_u32_u24 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mad_u32_u24 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7e,0x82,0xad,0x01] + +v_mad_u32_u24 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_mad_u32_u24 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_mad_u32_u24 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_mad_u32_u24 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_mad_u32_u24 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0b,0xd6,0xfd,0xd4,0x04,0x03] + +v_mad_u32_u24 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x0b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mad_co_u64_u32 v[5:6], s6, s105, s105, s[6:7] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s6, ttmp15, ttmp15, s[104:105] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s6, m0, 0.5, ttmp[14:15] +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s6, exec_lo, -1, exec +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s6, exec_hi, null, vcc +// W32: encoding: [0x05,0x06,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s105, null, exec_lo, null +// W32: encoding: [0x05,0x69,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], vcc_lo, -1, exec_hi, -1 +// W32: encoding: [0x05,0x6a,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], vcc_hi, 0.5, m0, 0xaf123456 +// W32: encoding: [0x05,0x6b,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], ttmp15, src_scc, vcc_lo, src_scc +// W32: encoding: [0x05,0x7b,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], ttmp15, ttmp15, s[104:105] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7b,0xf6,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], m0, 0.5, ttmp[14:15] +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7d,0xe0,0xe9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], exec_lo, -1, exec +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7e,0x82,0xf9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], exec_hi, null, vcc +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7f,0xf8,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[12:13], null, exec_lo, null +// W64: encoding: [0x05,0x0c,0xfe,0xd6,0x7c,0xfc,0xf0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], s[104:105], -1, exec_hi, -1 +// W64: encoding: [0x05,0x68,0xfe,0xd6,0xc1,0xfe,0x04,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], vcc, 0.5, m0, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfe,0xd6,0xf0,0xfa,0xfc,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[5:6], ttmp[14:15], src_scc, vcc_lo, src_scc +// W64: encoding: [0x05,0x7a,0xfe,0xd6,0xfd,0xd4,0xf4,0x03] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_mad_co_u64_u32 v[254:255], null, 0xaf123456, vcc_hi, 0.5 clamp +// GFX12: encoding: [0xfe,0xfc,0xfe,0xd6,0xff,0xd6,0xc0,0x03,0x56,0x34,0x12,0xaf] + +v_max3_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_max3_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2c,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x2c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_max3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x2c,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_max3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x2c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_max3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x2c,0xd6,0xf0,0xfa,0xc0,0x43] + +v_max3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x2c,0xd6,0xfd,0xd4,0x04,0x23] + +v_max3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x2c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_max3_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2a,0xd6,0x7b,0xfa,0xed,0xe1] + +v_max3_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2a,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2a,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_max3_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x2a,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_max3_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2a,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_max3_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2a,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_max3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x2a,0xd6,0xfd,0xd4,0x04,0x33] + +v_max3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x2a,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_max3_i16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_i16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x4d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x4d,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_max3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x4d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x4d,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x4d,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x4d,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_max3_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_max3_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1d,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x1d,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_max3_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x4e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x4e,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_max3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x4e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x4e,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x4e,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x4e,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_max3_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x01,0x05,0x0e,0x00] + +v_max3_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0xff,0x05,0xa4,0x01] + +v_max3_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x01,0xfe,0xff,0x01] + +v_max3_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x69,0xd2,0xf8,0x01] + +v_max3_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_max3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_max3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7b,0xfa,0xed,0x01] + +v_max3_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_max3_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7e,0x82,0xad,0x01] + +v_max3_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7f,0xf8,0xa8,0x01] + +v_max3_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_max3_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0xc1,0xfe,0xf4,0x03] + +v_max3_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0xf0,0xfa,0xc0,0x03] + +v_max3_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1e,0xd6,0xfd,0xd4,0x04,0x03] + +v_max3_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_max_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] + +v_max_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0xff,0xff,0x03,0x00] + +v_max_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x00,0x00] + +v_max_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x69,0xd2,0x00,0x00] + +v_max_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x6a,0xf6,0x00,0x00] + +v_max_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7b,0xfa,0x01,0x00] + +v_max_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7d,0xe0,0x01,0x00] + +v_max_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x82,0x01,0x00] + +v_max_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7f,0xf8,0x00,0x00] + +v_max_i16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0x7c,0xfc,0x00,0x00] + +v_max_i16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0xc1,0xfe,0x00,0x00] + +v_max_i16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0xf0,0xfa,0x00,0x00] + +v_max_i16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0a,0xd7,0xfd,0xd4,0x00,0x00] + +v_max_i16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_max_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] + +v_max_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0xff,0xff,0x03,0x00] + +v_max_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x00,0x00] + +v_max_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x69,0xd2,0x00,0x00] + +v_max_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x6a,0xf6,0x00,0x00] + +v_max_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7b,0xfa,0x01,0x00] + +v_max_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7d,0xe0,0x01,0x00] + +v_max_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7e,0x82,0x01,0x00] + +v_max_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7f,0xf8,0x00,0x00] + +v_max_u16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0x7c,0xfc,0x00,0x00] + +v_max_u16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] + +v_max_u16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0xf0,0xfa,0x00,0x00] + +v_max_u16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] + +v_max_u16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_maxmin_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_maxmin_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6b,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maxmin_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6b,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_num_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6b,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maxmin_num_f16 v5, null, exec_lo, -|0xfe0b| +// GFX12: encoding: [0x05,0x04,0x6b,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_maxmin_num_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6b,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maxmin_num_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6b,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maxmin_num_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6b,0xd6,0xfd,0xd4,0x04,0x33] + +v_maxmin_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6b,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_maxmin_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x69,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maxmin_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x69,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x69,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maxmin_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x69,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maxmin_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x69,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maxmin_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x69,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maxmin_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x69,0xd6,0xfd,0xd4,0x04,0x33] + +v_maxmin_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x69,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7b,0xfa,0xed,0x01] + +v_maxmin_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7f,0xf8,0xa8,0x01] + +v_maxmin_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_maxmin_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0xc1,0xfe,0xf4,0x03] + +v_maxmin_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0xf0,0xfa,0xc0,0x03] + +v_maxmin_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x64,0xd6,0xfd,0xd4,0x04,0x03] + +v_maxmin_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x64,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x69,0xd2,0xf8,0x01] + +v_maxmin_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maxmin_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7b,0xfa,0xed,0x01] + +v_maxmin_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maxmin_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7e,0x82,0xad,0x01] + +v_maxmin_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7f,0xf8,0xa8,0x01] + +v_maxmin_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_maxmin_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0xc1,0xfe,0xf4,0x03] + +v_maxmin_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0xf0,0xfa,0xc0,0x03] + +v_maxmin_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x62,0xd6,0xfd,0xd4,0x04,0x03] + +v_maxmin_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x62,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mbcnt_hi_u32_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x01,0x05,0x02,0x00] + +v_mbcnt_hi_u32_b32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0xff,0xff,0x03,0x00] + +v_mbcnt_hi_u32_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x01,0x04,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x69,0xd2,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x6a,0xf6,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_hi_u32_b32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7b,0xfa,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7d,0xe0,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7e,0x82,0x01,0x00] + +v_mbcnt_hi_u32_b32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7f,0xf8,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0x7c,0xfc,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0xc1,0xfe,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0xf0,0xfa,0x00,0x00] + +v_mbcnt_hi_u32_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x20,0xd7,0xfd,0xd4,0x00,0x00] + +v_mbcnt_hi_u32_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x20,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_lo_u32_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x05,0x02,0x00] + +v_mbcnt_lo_u32_b32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0xff,0xff,0x03,0x00] + +v_mbcnt_lo_u32_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x01,0x04,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x69,0xd2,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x6a,0xf6,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mbcnt_lo_u32_b32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7b,0xfa,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7d,0xe0,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7e,0x82,0x01,0x00] + +v_mbcnt_lo_u32_b32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7f,0xf8,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0x7c,0xfc,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0xc1,0xfe,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0xf0,0xfa,0x00,0x00] + +v_mbcnt_lo_u32_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1f,0xd7,0xfd,0xd4,0x00,0x00] + +v_mbcnt_lo_u32_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x1f,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_med3_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x32,0xd6,0x7b,0xfa,0xed,0xe1] + +v_med3_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x32,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x32,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x32,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x32,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_med3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x32,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x32,0xd6,0xf0,0xfa,0xc0,0x43] + +v_med3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x32,0xd6,0xfd,0xd4,0x04,0x23] + +v_med3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x32,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_med3_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x31,0xd6,0x7b,0xfa,0xed,0xe1] + +v_med3_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x31,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x31,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_med3_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x31,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_med3_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x31,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_med3_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x31,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_med3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x31,0xd6,0xfd,0xd4,0x04,0x33] + +v_med3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x31,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_med3_i16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_i16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x50,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x50,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_med3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x50,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x50,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x50,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x50,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_med3_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_med3_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x20,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x20,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_med3_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x51,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x51,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_med3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x51,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x51,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x51,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x51,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_med3_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x01,0x05,0x0e,0x00] + +v_med3_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0xff,0x05,0xa4,0x01] + +v_med3_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x01,0xfe,0xff,0x01] + +v_med3_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x69,0xd2,0xf8,0x01] + +v_med3_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x6a,0xf6,0x0c,0x04] + +v_med3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_med3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7b,0xfa,0xed,0x01] + +v_med3_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7d,0xe0,0xf5,0x01] + +v_med3_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7e,0x82,0xad,0x01] + +v_med3_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7f,0xf8,0xa8,0x01] + +v_med3_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_med3_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0xc1,0xfe,0xf4,0x03] + +v_med3_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0xf0,0xfa,0xc0,0x03] + +v_med3_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x21,0xd6,0xfd,0xd4,0x04,0x03] + +v_med3_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x21,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min3_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2b,0xd6,0x7b,0xfa,0xed,0xe1] + +v_min3_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_num_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x7d,0x2b,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_num_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x04,0x2b,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_min3_num_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x0e,0x2b,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_num_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x2b,0xd6,0xf0,0xfa,0xc0,0x43] + +v_min3_num_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x22,0x2b,0xd6,0xfd,0xd4,0x04,0x23] + +v_min3_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: encoding: [0xff,0xc3,0x2b,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_min3_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x29,0xd6,0x7b,0xfa,0xed,0xe1] + +v_min3_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x29,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x29,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_min3_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x29,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_min3_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x29,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_min3_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x29,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_min3_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x29,0xd6,0xfd,0xd4,0x04,0x33] + +v_min3_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x29,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_min3_i16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_i16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_i16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_i16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_i16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_i16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x4a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x4a,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_min3_i16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x4a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x4a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x4a,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_i16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x4a,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_min3_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_min3_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1a,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x1a,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min3_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_u16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX12: encoding: [0x05,0x78,0x4b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_u16 v5, null, exec_lo, 0xfe0b op_sel:[0,0,0,0] +// GFX12: encoding: [0x05,0x00,0x4b,0xd6,0x7c,0xfc,0xfc,0x03,0x0b,0xfe,0x00,0x00] + +v_min3_u16 v5, -1, exec_hi, src_scc op_sel:[1,0,0,0] +// GFX12: encoding: [0x05,0x08,0x4b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_u16 v5, 0.5, m0, 0.5 op_sel:[0,1,0,0] +// GFX12: encoding: [0x05,0x10,0x4b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,0,1,0] +// GFX12: encoding: [0x05,0x20,0x4b,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_u16 v255, 0xfe0b, vcc_hi, null op_sel:[0,0,0,1] +// GFX12: encoding: [0xff,0x40,0x4b,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_min3_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x01,0x05,0x0e,0x00] + +v_min3_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0xff,0x05,0xa4,0x01] + +v_min3_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x01,0xfe,0xff,0x01] + +v_min3_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x69,0xd2,0xf8,0x01] + +v_min3_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x6a,0xf6,0x0c,0x04] + +v_min3_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_min3_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7b,0xfa,0xed,0x01] + +v_min3_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7d,0xe0,0xf5,0x01] + +v_min3_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7e,0x82,0xad,0x01] + +v_min3_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7f,0xf8,0xa8,0x01] + +v_min3_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_min3_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0xc1,0xfe,0xf4,0x03] + +v_min3_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0xf0,0xfa,0xc0,0x03] + +v_min3_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x1b,0xd6,0xfd,0xd4,0x04,0x03] + +v_min3_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_min_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] + +v_min_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0xff,0xff,0x03,0x00] + +v_min_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x00,0x00] + +v_min_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x69,0xd2,0x00,0x00] + +v_min_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x6a,0xf6,0x00,0x00] + +v_min_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7b,0xfa,0x01,0x00] + +v_min_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7d,0xe0,0x01,0x00] + +v_min_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x82,0x01,0x00] + +v_min_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7f,0xf8,0x00,0x00] + +v_min_i16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0x7c,0xfc,0x00,0x00] + +v_min_i16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0xc1,0xfe,0x00,0x00] + +v_min_i16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0xf0,0xfa,0x00,0x00] + +v_min_i16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0c,0xd7,0xfd,0xd4,0x00,0x00] + +v_min_i16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_min_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] + +v_min_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0xff,0xff,0x03,0x00] + +v_min_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x00,0x00] + +v_min_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x69,0xd2,0x00,0x00] + +v_min_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x6a,0xf6,0x00,0x00] + +v_min_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7b,0xfa,0x01,0x00] + +v_min_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7d,0xe0,0x01,0x00] + +v_min_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x82,0x01,0x00] + +v_min_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7f,0xf8,0x00,0x00] + +v_min_u16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0x7c,0xfc,0x00,0x00] + +v_min_u16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0xc1,0xfe,0x00,0x00] + +v_min_u16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0xf0,0xfa,0x00,0x00] + +v_min_u16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x0b,0xd7,0xfd,0xd4,0x00,0x00] + +v_min_u16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_minmax_num_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_num_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_num_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_num_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_num_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_num_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_minmax_num_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6a,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minmax_num_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_num_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6a,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_num_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6a,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minmax_num_f16 v5, null, exec_lo, -|0xfe0b| +// GFX12: encoding: [0x05,0x04,0x6a,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] + +v_minmax_num_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6a,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minmax_num_f16 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6a,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minmax_num_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6a,0xd6,0xfd,0xd4,0x04,0x33] + +v_minmax_num_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6a,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +v_minmax_num_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_num_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_num_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_num_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_num_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_num_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_num_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x68,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minmax_num_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_num_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x68,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_num_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x68,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minmax_num_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x68,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minmax_num_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x68,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minmax_num_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x68,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minmax_num_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x68,0xd6,0xfd,0xd4,0x04,0x33] + +v_minmax_num_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x68,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_i32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_i32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_i32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_i32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_i32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7b,0xfa,0xed,0x01] + +v_minmax_i32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_i32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_i32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7f,0xf8,0xa8,0x01] + +v_minmax_i32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_minmax_i32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0xc1,0xfe,0xf4,0x03] + +v_minmax_i32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0xf0,0xfa,0xc0,0x03] + +v_minmax_i32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x65,0xd6,0xfd,0xd4,0x04,0x03] + +v_minmax_i32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x65,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minmax_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7b,0xfa,0xed,0x01] + +v_minmax_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minmax_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7e,0x82,0xad,0x01] + +v_minmax_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7f,0xf8,0xa8,0x01] + +v_minmax_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_minmax_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0xc1,0xfe,0xf4,0x03] + +v_minmax_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0xf0,0xfa,0xc0,0x03] + +v_minmax_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x63,0xd6,0xfd,0xd4,0x04,0x03] + +v_minmax_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xff,0xeb,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xe8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x01,0xd3,0xe8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0xfe,0xf7,0x18,0x00] + +v_mqsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x02,0xd6,0x0c,0x04] + +v_mqsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x68,0xd4,0xa0,0x01] + +v_mqsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x6a,0xfa,0xf8,0x07] + +v_mqsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x7a,0xfe,0xf0,0x01] + +v_mqsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x7e,0xfc,0xf8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0x7c,0xf8,0xa8,0x01] + +v_mqsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_mqsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0xf0,0xe0,0xf5,0x03] + +v_mqsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX12: encoding: [0x05,0x00,0x3b,0xd6,0xfd,0xfa,0xc1,0x03] + +v_mqsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX12: encoding: [0xfe,0x80,0x3b,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] + +v_mqsad_u32_u8 v[5:8], v[1:2], v2, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf2,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], v255, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xff,0xf3,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], s2, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x01,0x05,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], v[1:2], s105, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x01,0xd3,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], v[254:255], ttmp15, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0xfe,0xf7,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], s[2:3], vcc_hi, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x02,0xd6,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], s[104:105], vcc_lo, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x68,0xd4,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], vcc, m0, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x6a,0xfa,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], ttmp[14:15], exec_hi, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x7a,0xfe,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], exec, exec_lo, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x7e,0xfc,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], null, null, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0x7c,0xf8,0xf0,0x07] + +v_mqsad_u32_u8 v[5:8], -1, -1, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0xc1,0x82,0xf1,0x07] + +v_mqsad_u32_u8 v[5:8], 0.5, 0.5, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0xf0,0xe0,0xf1,0x07] + +v_mqsad_u32_u8 v[5:8], src_scc, src_scc, v[252:255] +// GFX12: encoding: [0x05,0x00,0x3d,0xd6,0xfd,0xfa,0xf1,0x07] + +v_mqsad_u32_u8 v[252:255], 0xaf123456, 0xaf123456, v[3:6] clamp +// GFX12: encoding: [0xfc,0x80,0x3d,0xd6,0xff,0xfe,0x0d,0x04,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x01,0x05,0x0e,0x00] + +v_msad_u8 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0xff,0x05,0xa4,0x01] + +v_msad_u8 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x01,0xfe,0xff,0x01] + +v_msad_u8 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x69,0xd2,0xf8,0x01] + +v_msad_u8 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x6a,0xf6,0x0c,0x04] + +v_msad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7b,0xfa,0xed,0x01] + +v_msad_u8 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7d,0xe0,0xf5,0x01] + +v_msad_u8 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7e,0x82,0xad,0x01] + +v_msad_u8 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7f,0xf8,0xa8,0x01] + +v_msad_u8 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_msad_u8 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0xc1,0xfe,0xf4,0x03] + +v_msad_u8 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0xf0,0xfa,0xc0,0x03] + +v_msad_u8 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x39,0xd6,0xfd,0xd4,0x04,0x03] + +v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x05,0x02,0x00] + +v_mul_hi_i32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0xff,0xff,0x03,0x00] + +v_mul_hi_i32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x01,0x04,0x00,0x00] + +v_mul_hi_i32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_hi_i32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_hi_i32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_hi_i32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_hi_i32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_hi_i32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_hi_i32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_hi_i32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_hi_i32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_hi_i32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x2e,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_hi_i32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] + +v_mul_hi_u32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0xff,0xff,0x03,0x00] + +v_mul_hi_u32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x04,0x00,0x00] + +v_mul_hi_u32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_hi_u32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_hi_u32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_hi_u32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_hi_u32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_hi_u32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_hi_u32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_hi_u32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_hi_u32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_hi_u32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x2d,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_hi_u32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_lo_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] + +v_mul_lo_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x00,0x00] + +v_mul_lo_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_lo_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_lo_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_lo_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_lo_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_lo_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_lo_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_lo_u16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_lo_u16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_lo_u16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_lo_u16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x05,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_lo_u16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_lo_u32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x05,0x02,0x00] + +v_mul_lo_u32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0xff,0xff,0x03,0x00] + +v_mul_lo_u32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x01,0x04,0x00,0x00] + +v_mul_lo_u32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x69,0xd2,0x00,0x00] + +v_mul_lo_u32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x6a,0xf6,0x00,0x00] + +v_mul_lo_u32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_lo_u32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7b,0xfa,0x01,0x00] + +v_mul_lo_u32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7d,0xe0,0x01,0x00] + +v_mul_lo_u32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7e,0x82,0x01,0x00] + +v_mul_lo_u32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7f,0xf8,0x00,0x00] + +v_mul_lo_u32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0x7c,0xfc,0x00,0x00] + +v_mul_lo_u32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0xc1,0xfe,0x00,0x00] + +v_mul_lo_u32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0xf0,0xfa,0x00,0x00] + +v_mul_lo_u32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x2c,0xd7,0xfd,0xd4,0x00,0x00] + +v_mul_lo_u32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] + +v_mullit_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0xff,0x05,0xa4,0x01] + +v_mullit_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] + +v_mullit_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] + +v_mullit_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] + +v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] + +v_mullit_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] + +v_mullit_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] + +v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_mullit_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] + +v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] + +v_or3_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0xff,0x05,0xa4,0x01] + +v_or3_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x01,0xfe,0xff,0x01] + +v_or3_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x69,0xd2,0xf8,0x01] + +v_or3_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x6a,0xf6,0x0c,0x04] + +v_or3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7b,0xfa,0xed,0x01] + +v_or3_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7d,0xe0,0xf5,0x01] + +v_or3_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7e,0x82,0xad,0x01] + +v_or3_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7f,0xf8,0xa8,0x01] + +v_or3_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_or3_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0xc1,0xfe,0xf4,0x03] + +v_or3_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0xf0,0xfa,0xc0,0x03] + +v_or3_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x58,0xd6,0xfd,0xd4,0x04,0x03] + +v_or3_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_or_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] + +v_or_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] + +v_or_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] + +v_or_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] + +v_or_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] + +v_or_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_or_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] + +v_or_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00] + +v_or_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] + +v_or_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] + +v_or_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] + +v_or_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] + +v_or_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00] + +v_or_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] + +v_or_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] + +v_pack_b32_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0xff,0xff,0x03,0x00] + +v_pack_b32_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x01,0x04,0x00,0x00] + +v_pack_b32_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x69,0xd2,0x00,0x00] + +v_pack_b32_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x6a,0xf6,0x00,0x00] + +v_pack_b32_f16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x7b,0xfa,0x01,0x00] + +v_pack_b32_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x7d,0xe0,0x01,0x00] + +v_pack_b32_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x7e,0x82,0x01,0x00] + +v_pack_b32_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x11,0xd7,0x7f,0xf8,0x00,0x00] + +v_pack_b32_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0x7c,0xfc,0x00,0x00] + +v_pack_b32_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0xc1,0xfe,0x00,0x00] + +v_pack_b32_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x11,0xd7,0xf0,0xfa,0x00,0x40] + +v_pack_b32_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x0a,0x11,0xd7,0xfd,0xd4,0x00,0x20] + +v_pack_b32_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX12: encoding: [0xff,0x13,0x11,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_perm_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x01,0x05,0x0e,0x00] + +v_perm_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0xff,0x05,0xa4,0x01] + +v_perm_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x01,0xfe,0xff,0x01] + +v_perm_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x69,0xd2,0xf8,0x01] + +v_perm_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x6a,0xf6,0x0c,0x04] + +v_perm_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_perm_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7b,0xfa,0xed,0x01] + +v_perm_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7d,0xe0,0xf5,0x01] + +v_perm_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7e,0x82,0xad,0x01] + +v_perm_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] + +v_perm_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_perm_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] + +v_perm_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] + +v_perm_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] + +v_perm_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, s2, s3 +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane16_b32 v5, v1, s105, s105 +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane16_b32 v5, v1, ttmp15, ttmp15 +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane16_b32 v5, v1, vcc_hi, exec_lo +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane16_b32 v5, v1, vcc_lo, m0 +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane16_b32 v5, v1, m0, vcc_hi +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane16_b32 v5, v1, exec_hi, vcc_lo +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane16_b32 v5, v1, exec_lo, src_scc +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX12: encoding: [0x05,0x18,0x5b,0xd6,0x01,0xf9,0xc0,0x03] + +v_permlane16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX12: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x05,0x03] + +v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX12: encoding: [0x05,0x08,0x5b,0xd6,0x01,0xe1,0xf1,0x01] + +v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX12: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] + +v_permlanex16_b32 v5, v1, s2, s3 +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] + +v_permlanex16_b32 v5, v1, s105, s105 +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlanex16_b32 v5, v1, ttmp15, ttmp15 +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] + +v_permlanex16_b32 v5, v1, vcc_hi, exec_lo +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlanex16_b32 v5, v1, vcc_lo, m0 +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlanex16_b32 v5, v1, m0, vcc_hi +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xac,0x01] + +v_permlanex16_b32 v5, v1, exec_hi, vcc_lo +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xa8,0x01] + +v_permlanex16_b32 v5, v1, exec_lo, src_scc +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlanex16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX12: encoding: [0x05,0x18,0x5c,0xd6,0x01,0xf9,0xc0,0x03] + +v_permlanex16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX12: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x05,0x03] + +v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX12: encoding: [0x05,0x08,0x5c,0xd6,0x01,0xe1,0xf1,0x01] + +v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX12: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] + +v_permlane16_var_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] + +v_permlane16_var_b32 v5, v1, v255 +// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00] + +v_permlane16_var_b32 v5, v255, v0 +// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0xff,0x01,0x02,0x00] + +v_permlane16_var_b32 v255, v1, v2 +// GFX12: encoding: [0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] + +v_permlane16_var_b32 v5, v1, v50, op_sel:[1,1] +// GFX12: encoding: [0x05,0x18,0x0f,0xd7,0x01,0x65,0x02,0x00] + +v_permlane16_var_b32 v5, v1, v50, op_sel:[0,0] +// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0x65,0x02,0x00] + +v_permlane16_var_b32 v5, v1, v50, op_sel:[1,0] +// GFX12: encoding: [0x05,0x08,0x0f,0xd7,0x01,0x65,0x02,0x00] + +v_permlane16_var_b32 v255, v255, v0, op_sel:[0,1] +// GFX12: encoding: [0xff,0x10,0x0f,0xd7,0xff,0x01,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v105 +// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xd3,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v255 +// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00] + +v_permlanex16_var_b32 v255, v1, v2 +// GFX12: encoding: [0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] + +v_permlanex16_var_b32 v1, v255, v2 +// GFX12: encoding: [0x01,0x00,0x10,0xd7,0xff,0x05,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v100, op_sel:[1,1] +// GFX12: encoding: [0x05,0x18,0x10,0xd7,0x01,0xc9,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v100, op_sel:[0,0] +// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xc9,0x02,0x00] + +v_permlanex16_var_b32 v5, v1, v100, op_sel:[1,0] +// GFX12: encoding: [0x05,0x08,0x10,0xd7,0x01,0xc9,0x02,0x00] + +v_permlanex16_var_b32 v255, v255, v100, op_sel:[0,1] +// GFX12: encoding: [0xff,0x10,0x10,0xd7,0xff,0xc9,0x02,0x00] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] + +v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] + +v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] + +v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] + +v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] + +v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] + +v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] + +v_qsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] + +v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] + +v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] + +v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] + +v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX12: encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] + +v_readlane_b32 s5, v1, s2 +// GFX12: encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] + +v_readlane_b32 s5, v1, s105 +// GFX12: encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] + +v_readlane_b32 s105, v1, ttmp15 +// GFX12: encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] + +v_readlane_b32 vcc_lo, v1, vcc_hi +// GFX12: encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] + +v_readlane_b32 vcc_hi, v1, vcc_lo +// GFX12: encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] + +v_readlane_b32 ttmp15, v1, m0 +// GFX12: encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] + +v_readlane_b32 null, v255, null +// GFX12: encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] + +v_sad_hi_u8 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_hi_u8 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_hi_u8 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_hi_u8 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_hi_u8 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_hi_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_hi_u8 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_hi_u8 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_hi_u8 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_hi_u8 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_hi_u8 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_hi_u8 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_hi_u8 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_hi_u8 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x23,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_hi_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x23,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sad_u16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u16 v5, vcc_hi, 0xfe0b, v255 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_sad_u16 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u16 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u16 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u16 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u16 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u16 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u16 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x24,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u16 v255, 0xfe0b, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x24,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] + +v_sad_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x25,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u32 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x25,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00] + +v_sad_u8 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01] + +v_sad_u8 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01] + +v_sad_u8 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01] + +v_sad_u8 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04] + +v_sad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01] + +v_sad_u8 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01] + +v_sad_u8 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01] + +v_sad_u8 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01] + +v_sad_u8 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_sad_u8 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03] + +v_sad_u8 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03] + +v_sad_u8 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03] + +v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX12: encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sub_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x01,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x01,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x01,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x01,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x01,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x01,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x01,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x01,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x01,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x01,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_i16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_i16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_i16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_i16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_i16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_i16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_i16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +// GFX12: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_i32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_i32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_i32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_i32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_i32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_i32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_i32 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_i32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_i32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x25,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_u16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_u16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_u16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] + +v_sub_nc_u16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] + +v_sub_nc_u16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] + +v_sub_nc_u16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_u16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] + +v_sub_nc_u16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00] + +v_sub_nc_u16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] + +v_sub_nc_u16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] + +v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +// GFX12: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +// GFX12: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +// GFX12: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00] + +v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_subrev_co_u32 v5, s6, v1, v2 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, v255, v255 +// W32: encoding: [0x05,0x06,0x02,0xd7,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, s1, s2 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, s105, s105 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, ttmp15, src_scc +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, m0, 0.5 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, exec_lo, -1 +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s6, exec_hi, null +// W32: encoding: [0x05,0x06,0x02,0xd7,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s105, null, exec_lo +// W32: encoding: [0x05,0x69,0x02,0xd7,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc_lo, -1, exec_hi +// W32: encoding: [0x05,0x6a,0x02,0xd7,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc_hi, 0.5, m0 +// W32: encoding: [0x05,0x6b,0x02,0xd7,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, ttmp15, src_scc, vcc_lo +// W32: encoding: [0x05,0x7b,0x02,0xd7,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], v1, v2 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], v255, v255 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], s1, s2 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], s105, s105 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], vcc_lo, ttmp15 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], vcc_hi, 0xaf123456 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], ttmp15, src_scc +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], m0, 0.5 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], exec_lo, -1 +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], exec_hi, null +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[12:13], null, exec_lo +// W64: encoding: [0x05,0x0c,0x02,0xd7,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, s[104:105], -1, exec_hi +// W64: encoding: [0x05,0x68,0x02,0xd7,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v5, vcc, 0.5, m0 +// W64: encoding: [0x05,0x6a,0x02,0xd7,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_u32 v5, ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x05,0x7a,0x02,0xd7,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32 v255, null, 0xaf123456, vcc_hi clamp +// GFX12: encoding: [0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_trig_preop_f64 v[5:6], v[1:2], v2 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], v255 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x01,0xff,0x03,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], s2 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x00,0x00] + +v_trig_preop_f64 v[5:6], v[1:2], s105 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x01,0xd3,0x00,0x00] + +v_trig_preop_f64 v[5:6], v[254:255], ttmp15 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0xfe,0xf7,0x00,0x00] + +v_trig_preop_f64 v[5:6], s[2:3], vcc_hi +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x02,0xd6,0x00,0x00] + +v_trig_preop_f64 v[5:6], s[104:105], vcc_lo +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x68,0xd4,0x00,0x00] + +v_trig_preop_f64 v[5:6], vcc, m0 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x6a,0xfa,0x00,0x00] + +v_trig_preop_f64 v[5:6], ttmp[14:15], exec_hi +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x7a,0xfe,0x00,0x00] + +v_trig_preop_f64 v[5:6], exec, exec_lo +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x7e,0xfc,0x00,0x00] + +v_trig_preop_f64 v[5:6], null, null +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0x7c,0xf8,0x00,0x00] + +v_trig_preop_f64 v[5:6], -1, -1 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0xc1,0x82,0x01,0x00] + +v_trig_preop_f64 v[5:6], 0.5, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2f,0xd7,0xf0,0xe0,0x01,0x08] + +v_trig_preop_f64 v[5:6], -|src_scc|, src_scc mul:4 +// GFX12: encoding: [0x05,0x01,0x2f,0xd7,0xfd,0xfa,0x01,0x30] + +v_trig_preop_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 +// GFX12: encoding: [0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] + +v_writelane_b32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00] + +v_writelane_b32 v5, s105, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x69,0x04,0x00,0x00] + +v_writelane_b32 v5, vcc_lo, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x6a,0x04,0x00,0x00] + +v_writelane_b32 v5, vcc_hi, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x6b,0x04,0x00,0x00] + +v_writelane_b32 v5, ttmp15, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7b,0x04,0x00,0x00] + +v_writelane_b32 v5, m0, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7d,0x04,0x00,0x00] + +v_writelane_b32 v5, exec_lo, s2 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7e,0x04,0x00,0x00] + +v_writelane_b32 v5, exec_hi, s105 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7f,0xd2,0x00,0x00] + +v_writelane_b32 v5, null, ttmp15 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0x7c,0xf6,0x00,0x00] + +v_writelane_b32 v5, -1, null +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0xc1,0xf8,0x00,0x00] + +v_writelane_b32 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0xf0,0xfa,0x00,0x00] + +v_writelane_b32 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x61,0xd7,0xfd,0xd4,0x00,0x00] + +v_writelane_b32 v255, 0xaf123456, vcc_hi +// GFX12: encoding: [0xff,0x00,0x61,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x01,0x05,0x0e,0x00] + +v_xad_u32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0xff,0x05,0xa4,0x01] + +v_xad_u32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x01,0xfe,0xff,0x01] + +v_xad_u32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x69,0xd2,0xf8,0x01] + +v_xad_u32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x6a,0xf6,0x0c,0x04] + +v_xad_u32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7b,0xfa,0xed,0x01] + +v_xad_u32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7d,0xe0,0xf5,0x01] + +v_xad_u32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7e,0x82,0xad,0x01] + +v_xad_u32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7f,0xf8,0xa8,0x01] + +v_xad_u32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_xad_u32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0xc1,0xfe,0xf4,0x03] + +v_xad_u32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0xf0,0xfa,0xc0,0x03] + +v_xad_u32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x45,0xd6,0xfd,0xd4,0x04,0x03] + +v_xad_u32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] + +v_xor3_b32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0xff,0x05,0xa4,0x01] + +v_xor3_b32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x01,0xfe,0xff,0x01] + +v_xor3_b32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x69,0xd2,0xf8,0x01] + +v_xor3_b32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x6a,0xf6,0x0c,0x04] + +v_xor3_b32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, ttmp15, src_scc, ttmp15 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7b,0xfa,0xed,0x01] + +v_xor3_b32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7d,0xe0,0xf5,0x01] + +v_xor3_b32 v5, exec_lo, -1, vcc_hi +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7e,0x82,0xad,0x01] + +v_xor3_b32 v5, exec_hi, null, vcc_lo +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7f,0xf8,0xa8,0x01] + +v_xor3_b32 v5, null, exec_lo, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_xor3_b32 v5, -1, exec_hi, src_scc +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0xc1,0xfe,0xf4,0x03] + +v_xor3_b32 v5, 0.5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0xf0,0xfa,0xc0,0x03] + +v_xor3_b32 v5, src_scc, vcc_lo, -1 +// GFX12: encoding: [0x05,0x00,0x40,0xd6,0xfd,0xd4,0x04,0x03] + +v_xor3_b32 v255, 0xaf123456, vcc_hi, null +// GFX12: encoding: [0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_xor_b16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] + +v_xor_b16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] + +v_xor_b16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] + +v_xor_b16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] + +v_xor_b16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] + +v_xor_b16 v5, vcc_hi, 0xfe0b +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_xor_b16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] + +v_xor_b16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7d,0xe0,0x01,0x00] + +v_xor_b16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] + +v_xor_b16 v5, exec_hi, null +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] + +v_xor_b16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] + +v_xor_b16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] + +v_xor_b16 v5, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0xf0,0xfa,0x00,0x00] + +v_xor_b16 v5, src_scc, vcc_lo +// GFX12: encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] + +v_xor_b16 v255, 0xfe0b, vcc_hi +// GFX12: encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_minimum_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x01,0x05,0x02,0x00] + +v_minimum_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xff,0xff,0x03,0x00] + +v_minimum_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x01,0x04,0x00,0x00] + +v_minimum_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x69,0xd2,0x00,0x00] + +v_minimum_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x6a,0xf6,0x00,0x00] + +v_minimum_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_minimum_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7b,0xfa,0x01,0x00] + +v_minimum_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7d,0xe0,0x01,0x00] + +v_minimum_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x65,0xd7,0x7f,0xf8,0x00,0x00] + +v_minimum_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xc1,0xfe,0x00,0x00] + +v_minimum_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x65,0xd7,0xf0,0xfa,0x00,0x40] + +v_minimum_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x65,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x65,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_maximum_f32 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x01,0x05,0x02,0x00] + +v_maximum_f32 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xff,0xff,0x03,0x00] + +v_maximum_f32 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x01,0x04,0x00,0x00] + +v_maximum_f32 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x69,0xd2,0x00,0x00] + +v_maximum_f32 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x6a,0xf6,0x00,0x00] + +v_maximum_f32 v5, vcc_hi, 0xaf123456 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_maximum_f32 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7b,0xfa,0x01,0x00] + +v_maximum_f32 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7d,0xe0,0x01,0x00] + +v_maximum_f32 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f32 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x66,0xd7,0x7f,0xf8,0x00,0x00] + +v_maximum_f32 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f32 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xc1,0xfe,0x00,0x00] + +v_maximum_f32 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x66,0xd7,0xf0,0xfa,0x00,0x40] + +v_maximum_f32 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x66,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x66,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_minimum_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00] + +v_minimum_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00] + +v_minimum_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00] + +v_minimum_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00] + +v_minimum_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00] + +v_minimum_f16 v5, vcc_hi, 0xaf12 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00] + +v_minimum_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00] + +v_minimum_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00] + +v_minimum_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00] + +v_minimum_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00] + +v_minimum_f16 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40] + +v_minimum_f16 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f16 v255, -|0xaf12|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00] + +v_minimum_f16 v205, v201, v200 +// GFX12: encoding: [0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00] + +v_maximum_f16 v5, v1, v2 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00] + +v_maximum_f16 v5, v255, v255 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00] + +v_maximum_f16 v5, s1, s2 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00] + +v_maximum_f16 v5, s105, s105 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00] + +v_maximum_f16 v5, vcc_lo, ttmp15 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00] + +v_maximum_f16 v5, vcc_hi, 0xaf12 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00] + +v_maximum_f16 v5, ttmp15, src_scc +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00] + +v_maximum_f16 v5, m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00] + +v_maximum_f16 v5, exec_lo, -1 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f16 v5, |exec_hi|, null +// GFX12: encoding: [0x05,0x01,0x68,0xd7,0x7f,0xf8,0x00,0x00] + +v_maximum_f16 v5, null, exec_lo +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f16 v5, -1, exec_hi +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xc1,0xfe,0x00,0x00] + +v_maximum_f16 v5, 0.5, -m0 +// GFX12: encoding: [0x05,0x00,0x68,0xd7,0xf0,0xfa,0x00,0x40] + +v_maximum_f16 v5, -src_scc, |vcc_lo| +// GFX12: encoding: [0x05,0x02,0x68,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f16 v255, -|0xaf12|, -|vcc_hi| +// GFX12: encoding: [0xff,0x03,0x68,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00] + +v_maximum_f16 v205, v201, v200 +// GFX12: encoding: [0xcd,0x00,0x68,0xd7,0xc9,0x91,0x03,0x00] + +v_minimum_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x01,0x07,0x02,0x00] + +v_minimum_f64 v[5:6], v[254:255], v[254:255] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xfe,0xfd,0x03,0x00] + +v_minimum_f64 v[5:6], s[6:7], s[4:5] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x06,0x08,0x00,0x00] + +v_minimum_f64 v[5:6], s[104:105], s[104:105] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x68,0xd0,0x00,0x00] + +v_minimum_f64 v[5:6], vcc, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xf4,0x00,0x00] + +v_minimum_f64 v[5:6], vcc, 0xaf121234 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf] + +v_minimum_f64 v[5:6], ttmp[14:15], src_scc +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7a,0xfa,0x01,0x00] + +v_minimum_f64 v[5:6], vcc, 0.5 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x6a,0xe0,0x01,0x00] + +v_minimum_f64 v[5:6], exec, -1 +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7e,0x82,0x01,0x00] + +v_minimum_f64 v[5:6], |exec|, null +// GFX12: encoding: [0x05,0x01,0x41,0xd7,0x7e,0xf8,0x00,0x00] + +v_minimum_f64 v[5:6], null, exec +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0x7c,0xfc,0x00,0x00] + +v_minimum_f64 v[5:6], -1, exec +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xc1,0xfc,0x00,0x00] + +v_minimum_f64 v[5:6], 0.5, -vcc +// GFX12: encoding: [0x05,0x00,0x41,0xd7,0xf0,0xd4,0x00,0x40] + +v_minimum_f64 v[5:6], -src_scc, |vcc| +// GFX12: encoding: [0x05,0x02,0x41,0xd7,0xfd,0xd4,0x00,0x20] + +v_minimum_f64 v[254:255], -|2|, -|vcc| +// GFX12: encoding: [0xfe,0x03,0x41,0xd7,0x82,0xd4,0x00,0x60] + +v_maximum_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x01,0x07,0x02,0x00] + +v_maximum_f64 v[5:6], v[254:255], v[254:255] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xfe,0xfd,0x03,0x00] + +v_maximum_f64 v[5:6], s[6:7], s[4:5] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x06,0x08,0x00,0x00] + +v_maximum_f64 v[5:6], s[104:105], s[104:105] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x68,0xd0,0x00,0x00] + +v_maximum_f64 v[5:6], vcc, ttmp[14:15] +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xf4,0x00,0x00] + +v_maximum_f64 v[5:6], vcc, 0xaf121234 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xfe,0x01,0x00,0x34,0x12,0x12,0xaf] + +v_maximum_f64 v[5:6], ttmp[14:15], src_scc +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7a,0xfa,0x01,0x00] + +v_maximum_f64 v[5:6], vcc, 0.5 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x6a,0xe0,0x01,0x00] + +v_maximum_f64 v[5:6], exec, -1 +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7e,0x82,0x01,0x00] + +v_maximum_f64 v[5:6], |exec|, null +// GFX12: encoding: [0x05,0x01,0x42,0xd7,0x7e,0xf8,0x00,0x00] + +v_maximum_f64 v[5:6], null, exec +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0x7c,0xfc,0x00,0x00] + +v_maximum_f64 v[5:6], -1, exec +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xc1,0xfc,0x00,0x00] + +v_maximum_f64 v[5:6], 0.5, -vcc +// GFX12: encoding: [0x05,0x00,0x42,0xd7,0xf0,0xd4,0x00,0x40] + +v_maximum_f64 v[5:6], -src_scc, |vcc| +// GFX12: encoding: [0x05,0x02,0x42,0xd7,0xfd,0xd4,0x00,0x20] + +v_maximum_f64 v[254:255], -|2|, -|vcc| +// GFX12: encoding: [0xfe,0x03,0x42,0xd7,0x82,0xd4,0x00,0x60] + +v_minimum3_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x01,0x05,0x0e,0x00] + +v_minimum3_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0xff,0x05,0xa4,0x01] + +v_minimum3_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x01,0xfe,0xff,0x01] + +v_minimum3_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimum3_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimum3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minimum3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimum3_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimum3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2d,0xd6,0x7e,0x82,0xad,0x01] + +v_minimum3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimum3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x2d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minimum3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimum3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minimum3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x2d,0xd6,0xfd,0xd4,0x04,0x33] + +v_minimum3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x2d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x01,0x05,0x0e,0x00] + +v_maximum3_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0xff,0x05,0xa4,0x01] + +v_maximum3_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x01,0xfe,0xff,0x01] + +v_maximum3_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximum3_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximum3_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximum3_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximum3_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2e,0xd6,0x7e,0x82,0xad,0x01] + +v_maximum3_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximum3_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x2e,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maximum3_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximum3_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x2e,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maximum3_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x2e,0xd6,0xfd,0xd4,0x04,0x33] + +v_maximum3_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x2e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minimum3_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x01,0x05,0x0e,0x00] + +v_minimum3_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0xff,0x05,0xa4,0x01] + +v_minimum3_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x01,0xfe,0xff,0x01] + +v_minimum3_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimum3_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimum3_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_minimum3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x2f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimum3_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimum3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x2f,0xd6,0x7e,0x82,0xad,0x01] + +v_minimum3_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x2f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimum3_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x2f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_minimum3_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x2f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimum3_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x2f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_minimum3_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x2f,0xd6,0xfd,0xd4,0x04,0x23] + +v_minimum3_f16 v255, -|0xaf12|, -|vcc_hi|, null clamp +// GFX12: encoding: [0xff,0x83,0x2f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x01,0x05,0x0e,0x00] + +v_maximum3_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0xff,0x05,0xa4,0x01] + +v_maximum3_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x01,0xfe,0xff,0x01] + +v_maximum3_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximum3_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximum3_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x30,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximum3_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximum3_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x30,0xd6,0x7e,0x82,0xad,0x01] + +v_maximum3_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x30,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximum3_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x30,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_maximum3_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x30,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximum3_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x30,0xd6,0xf0,0xfa,0xc0,0x43] + +v_maximum3_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x30,0xd6,0xfd,0xd4,0x04,0x23] + +v_maximumminimum_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x01,0x05,0x0e,0x00] + +v_maximumminimum_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0xff,0x05,0xa4,0x01] + +v_maximumminimum_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x01,0xfe,0xff,0x01] + +v_maximumminimum_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximumminimum_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximumminimum_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6d,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximumminimum_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximumminimum_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6d,0xd6,0x7e,0x82,0xad,0x01] + +v_maximumminimum_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6d,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximumminimum_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x6d,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6d,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximumminimum_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6d,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_maximumminimum_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6d,0xd6,0xfd,0xd4,0x04,0x33] + +v_maximumminimum_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6d,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x01,0x05,0x0e,0x00] + +v_minimummaximum_f32 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0xff,0x05,0xa4,0x01] + +v_minimummaximum_f32 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x01,0xfe,0xff,0x01] + +v_minimummaximum_f32 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimummaximum_f32 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimummaximum_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6c,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimummaximum_f32 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimummaximum_f32 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6c,0xd6,0x7e,0x82,0xad,0x01] + +v_minimummaximum_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6c,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimummaximum_f32 v5, null, exec_lo, -|0xaf123456| +// GFX12: encoding: [0x05,0x04,0x6c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] + +v_minimummaximum_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6c,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimummaximum_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX12: encoding: [0x05,0x00,0x6c,0xd6,0xf0,0xfa,0xc0,0x4b] + +v_minimummaximum_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX12: encoding: [0x05,0x02,0x6c,0xd6,0xfd,0xd4,0x04,0x33] + +v_minimummaximum_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX12: encoding: [0xff,0x83,0x6c,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + +v_maximumminimum_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x01,0x05,0x0e,0x00] + +v_maximumminimum_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0xff,0x05,0xa4,0x01] + +v_maximumminimum_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x01,0xfe,0xff,0x01] + +v_maximumminimum_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x69,0xd2,0xf8,0x01] + +v_maximumminimum_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x6a,0xf6,0x0c,0x04] + +v_maximumminimum_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_maximumminimum_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6f,0xd6,0x7b,0xfa,0xed,0xe1] + +v_maximumminimum_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0x7d,0xe0,0xf5,0x01] + +v_maximumminimum_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6f,0xd6,0x7e,0x82,0xad,0x01] + +v_maximumminimum_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6f,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_maximumminimum_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x6f,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_maximumminimum_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6f,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_maximumminimum_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x6f,0xd6,0xf0,0xfa,0xc0,0x43] + +v_maximumminimum_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x6f,0xd6,0xfd,0xd4,0x04,0x23] + +v_maximumminimum_f16 v255, -|0xaf12|, -|vcc_hi|, null clamp +// GFX12: encoding: [0xff,0x83,0x6f,0xd6,0xff,0xd6,0xf0,0x61,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, v1, v2, s3 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x01,0x05,0x0e,0x00] + +v_minimummaximum_f16 v5, v255, s2, s105 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0xff,0x05,0xa4,0x01] + +v_minimummaximum_f16 v5, s1, v255, exec_hi +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x01,0xfe,0xff,0x01] + +v_minimummaximum_f16 v5, s105, s105, exec_lo +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x69,0xd2,0xf8,0x01] + +v_minimummaximum_f16 v5, vcc_lo, ttmp15, v3 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x6a,0xf6,0x0c,0x04] + +v_minimummaximum_f16 v5, vcc_hi, 0xaf12, v255 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x6b,0xfe,0xfd,0x07,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: encoding: [0x05,0x07,0x6e,0xd6,0x7b,0xfa,0xed,0xe1] + +v_minimummaximum_f16 v5, m0, 0.5, m0 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0x7d,0xe0,0xf5,0x01] + +v_minimummaximum_f16 v5, |exec_lo|, -1, vcc_hi +// GFX12: encoding: [0x05,0x01,0x6e,0xd6,0x7e,0x82,0xad,0x01] + +v_minimummaximum_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX12: encoding: [0x05,0x05,0x6e,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_minimummaximum_f16 v5, null, exec_lo, -|0xaf12| +// GFX12: encoding: [0x05,0x04,0x6e,0xd6,0x7c,0xfc,0xfc,0x83,0x12,0xaf,0x00,0x00] + +v_minimummaximum_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX12: encoding: [0x05,0x06,0x6e,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_minimummaximum_f16 v5, 0.5, -m0, 0.5 +// GFX12: encoding: [0x05,0x00,0x6e,0xd6,0xf0,0xfa,0xc0,0x43] + +v_minimummaximum_f16 v5, -src_scc, |vcc_lo|, -1 +// GFX12: encoding: [0x05,0x02,0x6e,0xd6,0xfd,0xd4,0x04,0x23] + +v_s_exp_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x69,0x00,0x00,0x00] + +v_s_exp_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x6a,0x00,0x00,0x00] + +v_s_exp_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x6b,0x00,0x00,0x00] + +v_s_exp_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7b,0x00,0x00,0x00] + +v_s_exp_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7d,0x00,0x00,0x00] + +v_s_exp_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7e,0x00,0x00,0x00] + +v_s_exp_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7f,0x00,0x00,0x00] + +v_s_exp_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x7c,0x00,0x00,0x00] + +v_s_exp_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0xc1,0x00,0x00,0x00] + +v_s_exp_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0xf0,0x00,0x00,0x00] + +v_s_exp_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0xfd,0x00,0x00,0x00] + +v_s_exp_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x80,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_exp_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x20] + +v_s_exp_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x80,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x80,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x08] + +v_s_exp_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x10] + +v_s_exp_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x80,0xd6,0x01,0x00,0x00,0x18] + +v_s_exp_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x69,0x00,0x00,0x00] + +v_s_exp_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x6a,0x00,0x00,0x00] + +v_s_exp_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x6b,0x00,0x00,0x00] + +v_s_exp_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7b,0x00,0x00,0x00] + +v_s_exp_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7d,0x00,0x00,0x00] + +v_s_exp_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7e,0x00,0x00,0x00] + +v_s_exp_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7f,0x00,0x00,0x00] + +v_s_exp_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x7c,0x00,0x00,0x00] + +v_s_exp_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0xc1,0x00,0x00,0x00] + +v_s_exp_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0xf0,0x00,0x00,0x00] + +v_s_exp_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0xfd,0x00,0x00,0x00] + +v_s_exp_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x81,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_exp_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x20] + +v_s_exp_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x81,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x81,0xd6,0x01,0x00,0x00,0x00] + +v_s_exp_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x08] + +v_s_exp_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x10] + +v_s_exp_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x81,0xd6,0x01,0x00,0x00,0x18] + +v_s_log_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x69,0x00,0x00,0x00] + +v_s_log_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x6a,0x00,0x00,0x00] + +v_s_log_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x6b,0x00,0x00,0x00] + +v_s_log_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7b,0x00,0x00,0x00] + +v_s_log_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7d,0x00,0x00,0x00] + +v_s_log_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7e,0x00,0x00,0x00] + +v_s_log_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7f,0x00,0x00,0x00] + +v_s_log_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x7c,0x00,0x00,0x00] + +v_s_log_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0xc1,0x00,0x00,0x00] + +v_s_log_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0xf0,0x00,0x00,0x00] + +v_s_log_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0xfd,0x00,0x00,0x00] + +v_s_log_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x82,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_log_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x20] + +v_s_log_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x82,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x82,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x08] + +v_s_log_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x10] + +v_s_log_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x82,0xd6,0x01,0x00,0x00,0x18] + +v_s_log_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x69,0x00,0x00,0x00] + +v_s_log_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x6a,0x00,0x00,0x00] + +v_s_log_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x6b,0x00,0x00,0x00] + +v_s_log_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7b,0x00,0x00,0x00] + +v_s_log_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7d,0x00,0x00,0x00] + +v_s_log_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7e,0x00,0x00,0x00] + +v_s_log_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7f,0x00,0x00,0x00] + +v_s_log_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x7c,0x00,0x00,0x00] + +v_s_log_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0xc1,0x00,0x00,0x00] + +v_s_log_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0xf0,0x00,0x00,0x00] + +v_s_log_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0xfd,0x00,0x00,0x00] + +v_s_log_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x83,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_log_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x20] + +v_s_log_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x83,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x83,0xd6,0x01,0x00,0x00,0x00] + +v_s_log_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x08] + +v_s_log_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x10] + +v_s_log_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x83,0xd6,0x01,0x00,0x00,0x18] + +v_s_rcp_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x69,0x00,0x00,0x00] + +v_s_rcp_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x6a,0x00,0x00,0x00] + +v_s_rcp_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x6b,0x00,0x00,0x00] + +v_s_rcp_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7b,0x00,0x00,0x00] + +v_s_rcp_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7d,0x00,0x00,0x00] + +v_s_rcp_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7e,0x00,0x00,0x00] + +v_s_rcp_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7f,0x00,0x00,0x00] + +v_s_rcp_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x7c,0x00,0x00,0x00] + +v_s_rcp_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0xc1,0x00,0x00,0x00] + +v_s_rcp_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0xf0,0x00,0x00,0x00] + +v_s_rcp_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0xfd,0x00,0x00,0x00] + +v_s_rcp_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x84,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_rcp_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x20] + +v_s_rcp_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x84,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x84,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x08] + +v_s_rcp_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x10] + +v_s_rcp_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x84,0xd6,0x01,0x00,0x00,0x18] + +v_s_rcp_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x69,0x00,0x00,0x00] + +v_s_rcp_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x6a,0x00,0x00,0x00] + +v_s_rcp_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x6b,0x00,0x00,0x00] + +v_s_rcp_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7b,0x00,0x00,0x00] + +v_s_rcp_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7d,0x00,0x00,0x00] + +v_s_rcp_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7e,0x00,0x00,0x00] + +v_s_rcp_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7f,0x00,0x00,0x00] + +v_s_rcp_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x7c,0x00,0x00,0x00] + +v_s_rcp_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0xc1,0x00,0x00,0x00] + +v_s_rcp_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0xf0,0x00,0x00,0x00] + +v_s_rcp_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0xfd,0x00,0x00,0x00] + +v_s_rcp_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x85,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_rcp_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x20] + +v_s_rcp_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x85,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x85,0xd6,0x01,0x00,0x00,0x00] + +v_s_rcp_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x08] + +v_s_rcp_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x10] + +v_s_rcp_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x85,0xd6,0x01,0x00,0x00,0x18] + +v_s_rsq_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x69,0x00,0x00,0x00] + +v_s_rsq_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x6a,0x00,0x00,0x00] + +v_s_rsq_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x6b,0x00,0x00,0x00] + +v_s_rsq_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7b,0x00,0x00,0x00] + +v_s_rsq_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7d,0x00,0x00,0x00] + +v_s_rsq_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7e,0x00,0x00,0x00] + +v_s_rsq_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7f,0x00,0x00,0x00] + +v_s_rsq_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x7c,0x00,0x00,0x00] + +v_s_rsq_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0xc1,0x00,0x00,0x00] + +v_s_rsq_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0xf0,0x00,0x00,0x00] + +v_s_rsq_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0xfd,0x00,0x00,0x00] + +v_s_rsq_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x86,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_rsq_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x20] + +v_s_rsq_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x86,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x86,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x08] + +v_s_rsq_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x10] + +v_s_rsq_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x86,0xd6,0x01,0x00,0x00,0x18] + +v_s_rsq_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x69,0x00,0x00,0x00] + +v_s_rsq_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x6a,0x00,0x00,0x00] + +v_s_rsq_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x6b,0x00,0x00,0x00] + +v_s_rsq_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7b,0x00,0x00,0x00] + +v_s_rsq_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7d,0x00,0x00,0x00] + +v_s_rsq_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7e,0x00,0x00,0x00] + +v_s_rsq_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7f,0x00,0x00,0x00] + +v_s_rsq_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x7c,0x00,0x00,0x00] + +v_s_rsq_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0xc1,0x00,0x00,0x00] + +v_s_rsq_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0xf0,0x00,0x00,0x00] + +v_s_rsq_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0xfd,0x00,0x00,0x00] + +v_s_rsq_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x87,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_rsq_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x20] + +v_s_rsq_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x87,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x87,0xd6,0x01,0x00,0x00,0x00] + +v_s_rsq_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x08] + +v_s_rsq_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x10] + +v_s_rsq_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x87,0xd6,0x01,0x00,0x00,0x18] + +v_s_sqrt_f32 s5, s1 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, s105 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x69,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x6a,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x6b,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7b,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, m0 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7d,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7e,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7f,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, null +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x7c,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, -1 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0xc1,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0xf0,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0xfd,0x00,0x00,0x00] + +v_s_sqrt_f32 s105, 0xaf123456 +// GFX12: encoding: [0x69,0x00,0x88,0xd6,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_s_sqrt_f32 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x20] + +v_s_sqrt_f32 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x88,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x88,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f32 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x08] + +v_s_sqrt_f32 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x10] + +v_s_sqrt_f32 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x88,0xd6,0x01,0x00,0x00,0x18] + +v_s_sqrt_f16 s5, s1 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, s105 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x69,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, vcc_lo +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x6a,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, vcc_hi +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x6b,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, ttmp15 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7b,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, m0 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7d,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, exec_lo +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7e,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, exec_hi +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7f,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, null +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x7c,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, -1 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0xc1,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, 0.5 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0xf0,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, src_scc +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0xfd,0x00,0x00,0x00] + +v_s_sqrt_f16 s105, 0xaf12 +// GFX12: encoding: [0x69,0x00,0x89,0xd6,0xff,0x00,0x00,0x00,0x12,0xaf,0x00,0x00] + +v_s_sqrt_f16 s5, -s1 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x20] + +v_s_sqrt_f16 s5, |s1| +// GFX12: encoding: [0x05,0x01,0x89,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, s1 clamp +// GFX12: encoding: [0x05,0x80,0x89,0xd6,0x01,0x00,0x00,0x00] + +v_s_sqrt_f16 s5, s1 mul:2 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x08] + +v_s_sqrt_f16 s5, s1 mul:4 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x10] + +v_s_sqrt_f16 s5, s1 div:2 +// GFX12: encoding: [0x05,0x00,0x89,0xd6,0x01,0x00,0x00,0x18] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 484e73da199b3..3e99a6120bfdd 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add3_u32 v5, v1, v2, s3 // GFX12: encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16-fake16.s new file mode 100644 index 0000000000000..9fe555fa46706 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16-fake16.s @@ -0,0 +1,5764 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, s2, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, s2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,1,2,3] +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,1,2,3] +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x06,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x06,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 +// GFX12: encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] + +v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x69,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x69,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x69,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x69,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x69,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x69,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x69,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x31,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x31,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x31,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x31,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x31,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x31,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x29,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x29,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x29,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x29,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x29,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x29,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, v255 row_shl:1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:15 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi row_ror:15 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minmax_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minmax_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x68,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x68,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x68,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x68,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x68,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x68,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x68,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 +// W32: [0x05,0x69,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] + +v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] + +v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX12: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x00,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 +// GFX12: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] + +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_minimum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_minimum_f32 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_minimum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_minimum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_minimum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f32 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_maximum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_maximum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_minimum_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_minimum_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_minimum_f16 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_minimum_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_minimum_f16 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_minimum_f16 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_minimum_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_maximum_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f16 v5, v1, v2 row_mirror +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_half_mirror +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shl:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shl:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shr:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_shr:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_ror:1 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_ror:15 +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f16 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_maximum_f16 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_maximum_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimum3_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimum3_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minimum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maximum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minimum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimum3_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimum3_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimum3_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimum3_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimum3_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimum3_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimum3_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_minimum3_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_maximum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximum3_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximum3_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximum3_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximum3_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximum3_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximum3_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_maximum3_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_maximum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximumminimum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximumminimum_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximumminimum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_maximumminimum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimummaximum_f32 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimummaximum_f32 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimummaximum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimummaximum_f32 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimummaximum_f32 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimummaximum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimummaximum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +v_minimummaximum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maximumminimum_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_maximumminimum_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_maximumminimum_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_maximumminimum_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minimummaximum_f16 v5, v1, v2, v3 row_mirror +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, v255 row_half_mirror +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, s105 row_shl:1 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, vcc_hi row_shl:15 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_minimummaximum_f16 v5, v1, v2, vcc_lo row_shr:1 +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_minimummaximum_f16 v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX12: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_minimummaximum_f16 v5, v1, -|v2|, exec_hi row_ror:1 +// GFX12: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +v_minimummaximum_f16 v5, -v1, v2, |exec_lo| row_ror:15 +// GFX12: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +v_minimummaximum_f16 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +v_minimummaximum_f16 v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +v_minimummaximum_f16 v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +v_minimummaximum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 91817b9029db3..14b489efc8d19 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8-fake16.s new file mode 100644 index 0000000000000..4622797357301 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8-fake16.s @@ -0,0 +1,3814 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s + +v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x55,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x55,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s105, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x00,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[104:105], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x00,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xfc,0x00,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x47,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x26,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x16,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x16,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x17,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x62,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x62,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x57,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x57,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x3a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x3a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x3a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bcnt_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x11,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x11,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x10,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x10,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x12,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x12,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, -v1, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xe8,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x0c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x0c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x0c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x0c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x0c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x0c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x0c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x0f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x0f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x0f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x0f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x0f,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x0f,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x0f,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x0d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x0d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x0d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x0d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x0d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x0d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x0d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x0e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x0e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x0e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x0e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x0e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x0e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x0e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_cvt_pk_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,2,3,0,1] +// GFX12: encoding: [0x05,0x00,0x69,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0xa9,0x21] + +v_cvt_pk_fp8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x01,0x69,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_fp8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0x05,0x02,0x69,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_fp8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] +// GFX12: encoding: [0xff,0x03,0x69,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_bf8_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x01,0x6a,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_bf8_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0x05,0x02,0x6a,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_bf8_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] +// GFX12: encoding: [0xff,0x03,0x6a,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32_e64_dpp v5, |v1|, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x01,0x6b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32_e64_dpp v5, -v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6b,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32_e64_dpp v255, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: encoding: [0xff,0x01,0x6b,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x20,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x40,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x60,0x6b,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32_e64_dpp v5, |v1|, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x01,0x6c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32_e64_dpp v5, -v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0x05,0x00,0x6c,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32_e64_dpp v255, -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: encoding: [0xff,0x01,0x6c,0xd7,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x00,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x20,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x40,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x01,0x60,0x6c,0xd7,0xe9,0x06,0x02,0x00,0x02,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x06,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x06,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x06,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x24,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x24,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x07,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x07,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x07,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x23,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x23,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x26,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x01,0x26,0xd6,0xe9,0xfe,0xf7,0x23,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x21,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x21,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x21,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x22,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x22,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x48,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x48,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x48,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x48,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x13,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x13,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x13,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x13,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x13,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x13,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x13,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1c,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + +v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x81,0x1c,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x15,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x15,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x46,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x46,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x56,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x56,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x38,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x38,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x39,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x53,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x53,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x5a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x5a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x41,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x41,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x59,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x59,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0b,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2c,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x2c,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2a,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2a,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2a,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2a,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2a,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2a,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x2a,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x4d,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x4d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1d,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1d,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x4e,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x4e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1e,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x09,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6b,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6b,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6b,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6b,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6b,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6b,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x6b,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x69,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x69,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x69,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x69,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x69,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x69,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x69,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x64,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x64,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x62,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x62,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x20,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x20,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1f,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1f,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1f,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x32,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x32,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x32,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x32,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x31,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x31,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x31,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x31,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x31,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x31,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x31,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x50,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x50,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x20,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x20,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x51,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x51,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x21,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x21,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2b,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2b,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2b,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2b,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2b,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2b,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x2b,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x29,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x29,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x29,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x29,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x29,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x29,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x29,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x4a,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x4a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1a,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1a,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x4b,0xd6,0xea,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x4b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x1b,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x1b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0c,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0b,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6a,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6a,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6a,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6a,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6a,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6a,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x6a,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x68,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x68,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x68,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x68,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x68,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x68,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] +// GFX12: [0xff,0x87,0x68,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minmax_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x65,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x65,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_minmax_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x63,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x63,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x39,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x39,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x05,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x05,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x18,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x18,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x18,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x18,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x18,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x18,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x18,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x58,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x58,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x63,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x11,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x11,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x11,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x44,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x44,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x23,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x23,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x24,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x24,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x25,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x25,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x22,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x22,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s6, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x01,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x01,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x25,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x02,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x02,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xfc,0x02,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x45,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x45,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x40,0xd6,0xea,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x40,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x00,0x64,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x48,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x48,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x48,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x53,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x53,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x53,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x53,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x5a,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x90,0x5a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x41,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x41,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x41,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x41,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x41,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x59,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x90,0x59,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x2c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x2c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x2c,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x2c,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x2c,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x4d,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x4d,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x4d,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x4d,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x4d,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x4e,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x4e,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x4e,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x4e,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x4e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x32,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x32,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x32,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x32,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x32,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x50,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x50,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x50,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x50,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x50,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x51,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x51,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x51,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x51,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x51,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x7c,0x2b,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0b,0x2b,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x15,0x2b,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x26,0x2b,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc7,0x2b,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x4a,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x4a,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x4a,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x4a,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x4a,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x78,0x4b,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x4b,0xd6,0xe9,0x04,0xfa,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x4b,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x20,0x4b,0xd6,0xe9,0x04,0x06,0x03,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x40,0x4b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] + +v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x0a,0x11,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x65,0x66,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x66,0xd6,0xe9,0x04,0xc2,0x03,0x01,0x77,0x39,0x05] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand + +v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] +// GFX12: [0x00,0x65,0x67,0xd6,0xe9,0x04,0x0e,0xc0,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x67,0xd6,0xe9,0x04,0x02,0x02,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x65,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_minimum_f32 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x65,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_minimum_f32 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x65,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_maximum_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x66,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_maximum_f32 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x66,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_maximum_f32 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x66,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_maximum_f32 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x66,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_minimum_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x67,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_minimum_f16 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x67,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_minimum_f16 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x67,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_minimum_f16 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x67,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_maximum_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_maximum_f16 v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x68,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_maximum_f16 v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x02,0x68,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_maximum_f16 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x03,0x68,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_minimum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maximum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2e,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2e,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minimum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x2f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x2f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x2f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x2f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x2f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x2f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x2f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_maximum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x30,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x30,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x30,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x30,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x30,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x30,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x30,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_maximumminimum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6d,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6d,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6d,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6d,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6d,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6d,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6d,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_minimummaximum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6c,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6c,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6c,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6c,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6c,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6c,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6c,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +v_maximumminimum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6f,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6f,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6f,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6f,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6f,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6f,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6f,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +v_minimummaximum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x01,0x6e,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x02,0x6e,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x04,0x6e,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x03,0x6e,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x05,0x6e,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x06,0x6e,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x87,0x6e,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 3003d72b67968..2d912a4d1ad1f 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 0c1d538a22750..c58b696e2d2e7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s # GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00 @@ -411,49 +414,94 @@ # GFX11: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_and_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_and_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_and_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_and_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_and_b16 v5, m0, 0x3800 +# W32-REAL16: v_and_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_and_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_and_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_and_b16 v5, 0x3800, m0 +# W32-REAL16: v_and_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x62,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX11: v_and_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] @@ -4738,49 +4786,94 @@ # GFX11: v_or3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_or_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_or_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_or_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_or_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_or_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_or_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_or_b16 v5, m0, 0x3800 +# W32-REAL16: v_or_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_or_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_or_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_or_b16 v5, 0x3800, m0 +# W32-REAL16: v_or_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_or_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_or_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] @@ -5642,47 +5735,92 @@ # GFX11: v_xor3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_xor_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_xor_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_xor_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_xor_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_xor_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_xor_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_xor_b16 v5, m0, 0x3800 +# W32-REAL16: v_xor_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_xor_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_xor_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_xor_b16 v5, 0x3800, m0 +# W32-REAL16: v_xor_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x64,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_xor_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_xor_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 486243c450d67..bf3fa3bf65c74 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s # GFX11: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -266,46 +269,88 @@ # GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX11: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2637,46 +2682,88 @@ # GFX11: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3125,46 +3212,88 @@ # GFX11: v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index e88aad3312757..cdbf798fd99c7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,W64-FAKE16 %s # GFX11: v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -164,10 +167,16 @@ # GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX11: v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1599,10 +1608,16 @@ # GFX11: v_or3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x58,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1901,10 +1916,16 @@ # GFX11: v_xor3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x40,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index 44cbe5f31b2cf..9b41b22b9012f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s # GFX12: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00 @@ -375,49 +378,94 @@ # GFX12: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX12: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_and_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_and_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_and_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x62,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x62,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x62,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x62,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_and_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_and_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_and_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x62,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_and_b16 v5, m0, 0x3800 +# W32-REAL16: v_and_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x62,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_and_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_and_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_and_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x62,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x62,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x62,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_and_b16 v5, 0x3800, m0 +# W32-REAL16: v_and_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x62,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_and_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_and_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_and_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x62,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_and_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_and_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_and_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] @@ -4597,49 +4645,107 @@ # GFX12: v_or3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x58,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX12: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] + +# W32-REAL16: v_or_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_or_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_or_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] + +# W32-REAL16: v_or_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_or_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_or_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x63,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x63,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x63,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x63,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] + +# W32-REAL16: v_or_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_or_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_or_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x63,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_or_b16 v5, m0, 0x3800 +# W32-REAL16: v_or_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x63,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] + +# W32-REAL16: v_or_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_or_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_or_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x63,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x63,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x63,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_or_b16 v5, 0x3800, m0 +# W32-REAL16: v_or_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x63,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] + +# W32-REAL16: v_or_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_or_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_or_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x63,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +# W32-REAL16: v_or_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_or_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] @@ -5555,49 +5661,107 @@ # GFX12: v_xor3_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x40,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX12: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] + +# W32-REAL16: v_xor_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_xor_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_xor_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] + +# W32-REAL16: v_xor_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_xor_b16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_xor_b16 v5, v255, v255 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x64,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, s1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, s105, s105 ; encoding: [0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x64,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x64,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x64,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] + +# W32-REAL16: v_xor_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_xor_b16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_xor_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x64,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_xor_b16 v5, m0, 0x3800 +# W32-REAL16: v_xor_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x64,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] + +# W32-REAL16: v_xor_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_xor_b16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_xor_b16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x64,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, exec_hi, null ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, null, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x64,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x00,0x64,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_xor_b16 v5, 0x3800, m0 +# W32-REAL16: v_xor_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x64,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] + +# W32-REAL16: v_xor_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_xor_b16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_xor_b16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x00,0x64,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +# W32-REAL16: v_xor_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_xor_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_minimum_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x65,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index f6bb2e4a55282..f9efef4f4ebc3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -281,46 +284,88 @@ # GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -867,7 +912,7 @@ # GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] +# GFX12: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20] 0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20 # GFX12: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] @@ -894,7 +939,7 @@ # GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] 0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] +# GFX12: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20] 0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20 # GFX12: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] @@ -2883,46 +2928,88 @@ # GFX12: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3392,46 +3479,88 @@ # GFX12: v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index f291795c8a627..47611e0b9708f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -1,5 +1,8 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16 %s # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -179,10 +182,16 @@ # GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1779,10 +1788,16 @@ # GFX12: v_or3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x58,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x63,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2102,10 +2117,16 @@ # GFX12: v_xor3_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x40,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x64,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] From ec31f76df11d624699a7b2d4d9da052b4cc47452 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 24 Sep 2024 12:24:17 -0700 Subject: [PATCH 038/212] [NFC] Fix line endings for OptionStrCmp.h and .td test files (#109806) Fix line endings for these files to Unix style. --- llvm/include/llvm/Support/OptionStrCmp.h | 64 ++++++++++++------------ llvm/test/TableGen/listflatten-error.td | 12 ++--- llvm/test/TableGen/listflatten.td | 64 ++++++++++++------------ 3 files changed, 70 insertions(+), 70 deletions(-) diff --git a/llvm/include/llvm/Support/OptionStrCmp.h b/llvm/include/llvm/Support/OptionStrCmp.h index d417fe675e292..f3d3c2adb902f 100644 --- a/llvm/include/llvm/Support/OptionStrCmp.h +++ b/llvm/include/llvm/Support/OptionStrCmp.h @@ -1,32 +1,32 @@ -//===- OptionStrCmp.h - Option String Comparison ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_OPTIONSTRCMP_H -#define LLVM_SUPPORT_OPTIONSTRCMP_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" - -namespace llvm { - -// Comparison function for Option strings (option names & prefixes). -// The ordering is *almost* case-insensitive lexicographic, with an exception. -// '\0' comes at the end of the alphabet instead of the beginning (thus options -// precede any other options which prefix them). Additionally, if two options -// are identical ignoring case, they are ordered according to case sensitive -// ordering if `FallbackCaseSensitive` is true. -int StrCmpOptionName(StringRef A, StringRef B, - bool FallbackCaseSensitive = true); - -// Comparison function for Option prefixes. -int StrCmpOptionPrefixes(ArrayRef APrefixes, - ArrayRef BPrefixes); - -} // namespace llvm - -#endif // LLVM_SUPPORT_OPTIONSTRCMP_H +//===- OptionStrCmp.h - Option String Comparison ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_OPTIONSTRCMP_H +#define LLVM_SUPPORT_OPTIONSTRCMP_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +// Comparison function for Option strings (option names & prefixes). +// The ordering is *almost* case-insensitive lexicographic, with an exception. +// '\0' comes at the end of the alphabet instead of the beginning (thus options +// precede any other options which prefix them). Additionally, if two options +// are identical ignoring case, they are ordered according to case sensitive +// ordering if `FallbackCaseSensitive` is true. +int StrCmpOptionName(StringRef A, StringRef B, + bool FallbackCaseSensitive = true); + +// Comparison function for Option prefixes. +int StrCmpOptionPrefixes(ArrayRef APrefixes, + ArrayRef BPrefixes); + +} // namespace llvm + +#endif // LLVM_SUPPORT_OPTIONSTRCMP_H diff --git a/llvm/test/TableGen/listflatten-error.td b/llvm/test/TableGen/listflatten-error.td index 56062420982a1..2f13356b6792f 100644 --- a/llvm/test/TableGen/listflatten-error.td +++ b/llvm/test/TableGen/listflatten-error.td @@ -1,6 +1,6 @@ -// RUN: not llvm-tblgen %s 2>&1 | FileCheck %s -DFILE=%s - -// CHECK: [[FILE]]:[[@LINE+2]]:33: error: expected list type argument in unary operator -class Flatten { - list F = !listflatten(A); -} +// RUN: not llvm-tblgen %s 2>&1 | FileCheck %s -DFILE=%s + +// CHECK: [[FILE]]:[[@LINE+2]]:33: error: expected list type argument in unary operator +class Flatten { + list F = !listflatten(A); +} diff --git a/llvm/test/TableGen/listflatten.td b/llvm/test/TableGen/listflatten.td index bc9b1c71ea88d..a76ac21c4ad84 100644 --- a/llvm/test/TableGen/listflatten.td +++ b/llvm/test/TableGen/listflatten.td @@ -1,32 +1,32 @@ -// RUN: llvm-tblgen %s | FileCheck %s - -class Flatten A, list B> { - list Flat1 = !listflatten([A, B, [6], [7, 8]]); - - list> X = [A, B]; - list Flat2 = !listflatten(!listconcat(X, [[7]])); - - // Generate a nested list of integers. - list Y0 = [1, 2, 3, 4]; - list> Y1 = !foreach(elem, Y0, [elem]); - list>> Y2 = !foreach(elem, Y1, [elem]); - list>>> Y3 = !foreach(elem, Y2, [elem]); - - // Flatten it completely. - list Flat3=!listflatten(!listflatten(!listflatten(Y3))); - - // Flatten it partially. - list>> Flat4 = !listflatten(Y3); - list> Flat5 = !listflatten(!listflatten(Y3)); - - // Test NOP flattening. - list Flat6 = !listflatten(["a", "b"]); -} - -// CHECK: list Flat1 = [1, 2, 3, 4, 5, 6, 7, 8]; -// CHECK: list Flat2 = [1, 2, 3, 4, 5, 7]; -// CHECK: list Flat3 = [1, 2, 3, 4]; -// CHECK{LITERAL}: list>> Flat4 = [[[1]], [[2]], [[3]], [[4]]]; -// CHECK: list Flat6 = ["a", "b"]; -def F : Flatten<[1,2], [3,4,5]>; - +// RUN: llvm-tblgen %s | FileCheck %s + +class Flatten A, list B> { + list Flat1 = !listflatten([A, B, [6], [7, 8]]); + + list> X = [A, B]; + list Flat2 = !listflatten(!listconcat(X, [[7]])); + + // Generate a nested list of integers. + list Y0 = [1, 2, 3, 4]; + list> Y1 = !foreach(elem, Y0, [elem]); + list>> Y2 = !foreach(elem, Y1, [elem]); + list>>> Y3 = !foreach(elem, Y2, [elem]); + + // Flatten it completely. + list Flat3=!listflatten(!listflatten(!listflatten(Y3))); + + // Flatten it partially. + list>> Flat4 = !listflatten(Y3); + list> Flat5 = !listflatten(!listflatten(Y3)); + + // Test NOP flattening. + list Flat6 = !listflatten(["a", "b"]); +} + +// CHECK: list Flat1 = [1, 2, 3, 4, 5, 6, 7, 8]; +// CHECK: list Flat2 = [1, 2, 3, 4, 5, 7]; +// CHECK: list Flat3 = [1, 2, 3, 4]; +// CHECK{LITERAL}: list>> Flat4 = [[[1]], [[2]], [[3]], [[4]]]; +// CHECK: list Flat6 = ["a", "b"]; +def F : Flatten<[1,2], [3,4,5]>; + From 38371a1855dd891fdd0a6cf437e7c25b80f36dfe Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Tue, 24 Sep 2024 13:08:33 -0700 Subject: [PATCH 039/212] [rtsan][NFC] Make Uninitialzed state explicit (#109856) Follow on to #109830 There should be no functional change, as enums start at 0 anyway. This just makes the code more readable and prevents any future bugs. --- compiler-rt/lib/rtsan/rtsan.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index e6d2481b2c2a3..84e4b8fae1e2f 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -31,7 +31,8 @@ enum class InitializationState : u8 { } // namespace static StaticSpinMutex rtsan_inited_mutex; -static atomic_uint8_t rtsan_initialized = {0}; +static atomic_uint8_t rtsan_initialized = { + static_cast(InitializationState::Uninitialized)}; static void SetInitializationState(InitializationState state) { atomic_store(&rtsan_initialized, static_cast(state), From 491123562a7597f8b43e317a6481c69e4d15c4e7 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 24 Sep 2024 13:18:21 -0700 Subject: [PATCH 040/212] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (#109203) For the modules containing context roots, the way IPO happens will potentially result in imported functions that are differently specialized (even if themselves not inlined) than their originals. So we want to convert them to local rather than elide them. Eventually we'd perform this as a ThinLTO directive. --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 19 ++++++++++++++----- .../transform-to-local.ll | 18 ++++++++++++++++-- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index 2b34d3b5a56ea..d3d27de4218c8 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" @@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) { ++NumConversions; } -static bool eliminateAvailableExternally(Module &M) { +static bool eliminateAvailableExternally(Module &M, bool Convert) { bool Changed = false; // Drop initializers of available externally global variables. @@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) { if (F.isDeclaration() || !F.hasAvailableExternallyLinkage()) continue; - if (ConvertToLocal) + if (Convert || ConvertToLocal) convertToLocalCopy(M, F); else deleteFunction(F); @@ -125,8 +126,16 @@ static bool eliminateAvailableExternally(Module &M) { } PreservedAnalyses -EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { - if (!eliminateAvailableExternally(M)) - return PreservedAnalyses::all(); +EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { + auto *CtxProf = MAM.getCachedResult(M); + // Convert to local instead of eliding if we use contextual profiling in this + // module. This is because the IPO decisions performed with contextual + // information will likely differ from decisions made without. For a function + // that's imported, its optimizations will, thus, differ, and be specialized + // for this contextual information. Eliding it in favor of the original would + // undo these optimizations. + if (!eliminateAvailableExternally(M, /*Convert=*/(CtxProf && !!(*CtxProf)))) + ; + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } diff --git a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll index 786cc260d331c..4908fba62e3bf 100644 --- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll +++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll @@ -1,6 +1,16 @@ ; REQUIRES: asserts ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < %s | FileCheck %s +; +; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile.ctxprofdata +; +; Because we pass a contextual profile with a root defined in this module, we expect the outcome to be the same as-if +; we passed -avail-extern-to-local, i.e. available_externally don't get elided and instead get converted to local linkage +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s +; If the profile doesn't apply to this module, available_externally won't get converted to internal linkage, and will be +; removed instead. +; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON --input=- --output=%t_profile_bad.ctxprofdata +; RUN: opt -passes='assign-guid,require,elim-avail-extern' -use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s --check-prefix=NOOP declare void @call_out(ptr %fct) @@ -12,13 +22,15 @@ define available_externally hidden void @g() { ret void } -define void @hello(ptr %g) { +define void @hello(ptr %g) !guid !0 { call void @f() %f = load ptr, ptr @f call void @call_out(ptr %f) ret void } +!0 = !{i64 1234} + ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}() ; CHECK: declare hidden void @g() ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}() @@ -26,4 +38,6 @@ define void @hello(ptr %g) { ; CHECK-NEXT: call void @call_out(ptr %f) ; CHECK: Statistics Collected ; CHECK: 1 elim-avail-extern - Number of functions converted -; CHECK: 1 elim-avail-extern - Number of functions removed \ No newline at end of file +; CHECK: 1 elim-avail-extern - Number of functions removed + +; NOOP: 2 elim-avail-extern - Number of functions removed \ No newline at end of file From cf9fc5e22fa51e5b3a8182e7ef408cd77064a664 Mon Sep 17 00:00:00 2001 From: c8ef Date: Wed, 25 Sep 2024 04:30:31 +0800 Subject: [PATCH 041/212] [CMake] enable CMP0147 policy if available (#109150) Closes #38383. This enables parallel custom build commands, which improve compilation time on Windows with Visual Studio. --- cmake/Modules/CMakePolicy.cmake | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cmake/Modules/CMakePolicy.cmake b/cmake/Modules/CMakePolicy.cmake index b6962668cb09a..665af01d43bd2 100644 --- a/cmake/Modules/CMakePolicy.cmake +++ b/cmake/Modules/CMakePolicy.cmake @@ -29,3 +29,9 @@ endif() if(POLICY CMP0144) cmake_policy(SET CMP0144 NEW) endif() + +# CMP0147: Visual Studio Generators build custom commands in parallel. +# New in CMake 3.27: https://cmake.org/cmake/help/latest/policy/CMP0147.html +if(POLICY CMP0147) + cmake_policy(SET CMP0147 NEW) +endif() From b4130bee6bfd34d8045f02fc9f951bcb5db9d85c Mon Sep 17 00:00:00 2001 From: Zentrik Date: Tue, 24 Sep 2024 21:33:57 +0100 Subject: [PATCH 042/212] Fix libFuzzer not building with pthreads on Windows (#109525) Fixes https://github.com/llvm/llvm-project/issues/106871 --- compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp index e0210aa0ac365..37aecae7237ae 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp @@ -239,6 +239,10 @@ size_t PageSize() { } void SetThreadName(std::thread &thread, const std::string &name) { +#if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) || \ + defined(_GLIBCXX_GCC_GTHR_POSIX_H) + (void)pthread_setname_np(thread.native_handle(), name.c_str()); +#else typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR); HMODULE kbase = GetModuleHandleA("KernelBase.dll"); proc ThreadNameProc = reinterpret_cast( @@ -253,6 +257,7 @@ void SetThreadName(std::thread &thread, const std::string &name) { } } } +#endif } } // namespace fuzzer From 538dbb97a215d4f203cb1e88455bd10a1ecd7e7d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 24 Sep 2024 13:27:36 -0700 Subject: [PATCH 043/212] [SLP][NFC]Add a test with the operand reordering and bad reused values decision --- .../X86/splat-score-adjustment.ll | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll new file mode 100644 index 0000000000000..9691cb7537a70 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 < %s | FileCheck %s + +define i32 @a() { +; CHECK-LABEL: define i32 @a( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[TMP2:%.*]] = phi i8 [ 0, [[TMP0:%.*]] ], [ [[TMP7:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ 0, [[TMP0]] ], [ [[TMP8:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ zeroinitializer, [[TMP0]] ], [ [[TMP6:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP6]] = load <4 x i8>, ptr null, align 4 +; CHECK-NEXT: [[TMP7]] = extractelement <4 x i8> [[TMP6]], i32 3 +; CHECK-NEXT: [[TMP8]] = extractelement <4 x i8> [[TMP6]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = xor i8 [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i8> [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = xor i8 [[TMP11]], [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = xor i8 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = xor i8 [[TMP7]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i8> [[TMP16]], i8 [[TMP12]], i32 2 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP17]], i8 [[TMP13]], i32 4 +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i8> [[TMP19]], <8 x i8> [[TMP18]], <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x i8> [[TMP20]], i8 [[TMP14]], i32 6 +; CHECK-NEXT: [[TMP22:%.*]] = xor <8 x i8> [[TMP18]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = xor <8 x i8> [[TMP22]], [[TMP5]] +; CHECK-NEXT: store <8 x i8> [[TMP23]], ptr null, align 4 +; CHECK-NEXT: br label %[[BB1]] +; + br label %1 + +1: + %2 = phi i8 [ 0, %0 ], [ %40, %1 ] + %3 = phi i8 [ 0, %0 ], [ %28, %1 ] + %4 = phi i8 [ 0, %0 ], [ %16, %1 ] + %5 = phi i8 [ 0, %0 ], [ %6, %1 ] + %6 = load i8, ptr null, align 4 + %7 = xor i8 %6, %3 + %8 = xor i8 %7, %4 + %9 = xor i8 %8, %5 + store i8 %9, ptr null, align 4 + %10 = xor i8 %6, %2 + %11 = xor i8 %10, %5 + %12 = add i64 0, 1 + %13 = getelementptr i8, ptr null, i64 %12 + store i8 %11, ptr %13, align 1 + %14 = add i64 0, 1 + %15 = getelementptr i8, ptr null, i64 %14 + %16 = load i8, ptr %15, align 1 + %17 = xor i8 %16, %2 + %18 = xor i8 %17, %3 + %19 = xor i8 %18, %4 + %20 = add i64 0, 2 + %21 = getelementptr i8, ptr null, i64 %20 + store i8 %19, ptr %21, align 2 + %22 = xor i8 %16, %6 + %23 = xor i8 %22, %4 + %24 = add i64 0, 3 + %25 = getelementptr i8, ptr null, i64 %24 + store i8 %23, ptr %25, align 1 + %26 = add i64 0, 2 + %27 = getelementptr i8, ptr null, i64 %26 + %28 = load i8, ptr %27, align 2 + %29 = xor i8 %28, %6 + %30 = xor i8 %29, %2 + %31 = xor i8 %30, %3 + %32 = add i64 0, 4 + %33 = getelementptr i8, ptr null, i64 %32 + store i8 %31, ptr %33, align 4 + %34 = xor i8 %28, %16 + %35 = xor i8 %34, %3 + %36 = add i64 0, 5 + %37 = getelementptr i8, ptr null, i64 %36 + store i8 %35, ptr %37, align 1 + %38 = add i64 0, 3 + %39 = getelementptr i8, ptr null, i64 %38 + %40 = load i8, ptr %39, align 1 + %41 = xor i8 %40, %16 + %42 = xor i8 %41, %6 + %43 = xor i8 %42, %2 + %44 = add i64 0, 6 + %45 = getelementptr i8, ptr null, i64 %44 + store i8 %43, ptr %45, align 2 + %46 = xor i8 %40, %28 + %47 = xor i8 %46, %2 + %48 = add i64 0, 7 + %49 = getelementptr i8, ptr null, i64 %48 + store i8 %47, ptr %49, align 1 + br label %1 +} + From b9bd8ca24e46c1fd6fa343a248936e395f103765 Mon Sep 17 00:00:00 2001 From: "Miguel A. Arroyo" Date: Tue, 24 Sep 2024 13:57:01 -0700 Subject: [PATCH 044/212] [LLD][COFF] Adds `/includeglob` flag (#109721) This implements parity with the `--undefined-glob` flag on [ELF](https://reviews.llvm.org/D63244), but for COFF. --- lld/COFF/Driver.cpp | 23 ++++++++++++ lld/COFF/Driver.h | 2 ++ lld/COFF/Options.td | 3 ++ lld/docs/ReleaseNotes.rst | 1 + lld/test/COFF/Inputs/include1d.yaml | 29 ++++++++++++++++ lld/test/COFF/include.test | 54 +++++++++++++++++++++++++++++ lld/test/COFF/include2.test | 10 ++++-- 7 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 lld/test/COFF/Inputs/include1d.yaml diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index f66fe3cab5a2f..5a6a4a61030e6 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Parallel.h" @@ -704,6 +705,24 @@ Symbol *LinkerDriver::addUndefined(StringRef name) { return b; } +void LinkerDriver::addUndefinedGlob(StringRef arg) { + Expected pat = GlobPattern::create(arg); + if (!pat) { + error("/includeglob: " + toString(pat.takeError())); + return; + } + + SmallVector syms; + ctx.symtab.forEachSymbol([&syms, &pat](Symbol *sym) { + if (pat->match(sym->getName())) { + syms.push_back(sym); + } + }); + + for (Symbol *sym : syms) + addUndefined(sym->getName()); +} + StringRef LinkerDriver::mangleMaybe(Symbol *s) { // If the plain symbol name has already been resolved, do nothing. Undefined *unmangled = dyn_cast(s); @@ -2524,6 +2543,10 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { } while (run()); } + // Handle /includeglob + for (StringRef pat : args::getStrings(args, OPT_incl_glob)) + addUndefinedGlob(pat); + // Create wrapped symbols for -wrap option. std::vector wrapped = addWrappedSymbols(ctx, args); // Load more object files that might be needed for wrapped symbols. diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h index 0c195a7cc3148..58a2ed2310624 100644 --- a/lld/COFF/Driver.h +++ b/lld/COFF/Driver.h @@ -172,6 +172,8 @@ class LinkerDriver { Symbol *addUndefined(StringRef sym); + void addUndefinedGlob(StringRef arg); + StringRef mangleMaybe(Symbol *s); // Windows specific -- "main" is not the only main function in Windows. diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index 4bc4d7c4b5a47..7ceb824c068de 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -310,6 +310,9 @@ defm build_id: B< "Generate build ID (always on when generating PDB)", "Do not Generate build ID">; +def incl_glob : Joined<["/", "-", "/?", "-?"], "includeglob:">, + HelpText<"Force symbol to be added to symbol table as undefined one using a glob pattern">; + // Flags for debugging def lldmap : F<"lldmap">; def lldmap_file : P_priv<"lldmap">; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 6d09de10e7195..6e043773f0037 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -41,6 +41,7 @@ Breaking changes COFF Improvements ----------------- +* ``/includeglob`` has been implemented to match the behavior of ``--undefined-glob`` available for ELF. MinGW Improvements ------------------ diff --git a/lld/test/COFF/Inputs/include1d.yaml b/lld/test/COFF/Inputs/include1d.yaml new file mode 100644 index 0000000000000..d315cc885dd7c --- /dev/null +++ b/lld/test/COFF/Inputs/include1d.yaml @@ -0,0 +1,29 @@ +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: B800000000506800000000680000000050E80000000050E800000000 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 28 + NumberOfRelocations: 4 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + - Name: baz + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/lld/test/COFF/include.test b/lld/test/COFF/include.test index 8879ee5bd7a61..2a8a8fe4034c4 100644 --- a/lld/test/COFF/include.test +++ b/lld/test/COFF/include.test @@ -9,10 +9,18 @@ # RUN: echo dummy >> %t.log # RUN: FileCheck -check-prefix=CHECK2 %s < %t.log +# RUN: lld-link /out:%t.exe /entry:main %t.obj /verbose /includeglob:"glob_*" >& %t.log +# RUN: echo dummy >> %t.log +# RUN: FileCheck -check-prefix=CHECK3 %s < %t.log + # CHECK1: Discarded unused +# CHECK1: Discarded glob_match1 +# CHECK1: Discarded glob_match2 # CHECK1-NOT: Discarded used # CHECK2-NOT: Discarded unused # CHECK2-NOT: Discarded used +# CHECK3-NOT: Discarded glob_match1 +# CHECK3-NOT: Discarded glob_match2 --- !COFF header: @@ -31,6 +39,14 @@ sections: Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] Alignment: 4 SectionData: B82A000000C3 + - Name: '.text$mn' + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: B82A000000C3 + - Name: '.text$mn' + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: B82A000000C3 - Name: .drectve Characteristics: [ IMAGE_SCN_LNK_INFO, IMAGE_SCN_LNK_REMOVE ] Alignment: 1 @@ -75,6 +91,32 @@ symbols: CheckSum: 0 Number: 0 Selection: IMAGE_COMDAT_SELECT_ANY + - Name: '.text$mn' + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 6 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + Selection: IMAGE_COMDAT_SELECT_ANY + - Name: '.text$mn' + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 6 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 0 + Selection: IMAGE_COMDAT_SELECT_ANY - Name: main Value: 0 SectionNumber: 1 @@ -93,4 +135,16 @@ symbols: SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_FUNCTION StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: glob_match1 + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: glob_match2 + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL ... diff --git a/lld/test/COFF/include2.test b/lld/test/COFF/include2.test index 557de47d9e198..4796512fff93f 100644 --- a/lld/test/COFF/include2.test +++ b/lld/test/COFF/include2.test @@ -1,14 +1,20 @@ # RUN: yaml2obj %p/Inputs/include1a.yaml -o %t1.obj # RUN: yaml2obj %p/Inputs/include1b.yaml -o %t2.obj # RUN: yaml2obj %p/Inputs/include1c.yaml -o %t3.obj -# RUN: rm -f %t2.lib %t3.lib +# RUN: yaml2obj %p/Inputs/include1d.yaml -o %t4.obj +# RUN: rm -f %t2.lib %t3.lib %t4.lib # RUN: llvm-ar cru %t2.lib %t2.obj # RUN: llvm-ar cru %t3.lib %t3.obj -# RUN: lld-link /out:%t.exe /entry:main %t1.obj %t2.lib %t3.lib /verbose >& %t.log +# RUN: llvm-ar cru %t4.lib %t4.obj +# RUN: lld-link /out:%t.exe /entry:main %t1.obj %t2.lib %t3.lib %t4.lib /verbose >& %t.log # RUN: FileCheck %s < %t.log +# RUN: lld-link /out:%t.exe /entry:main %t1.obj %t2.lib %t3.lib %t4.lib /includeglob:baz /verbose >& %t.glob.log +# RUN: FileCheck -check-prefix=GLOB %s < %t.glob.log CHECK: include2.test.tmp1.obj CHECK: include2.test.tmp2.lib CHECK: include2.test.tmp2.lib(include2.test.tmp2.obj) for foo CHECK: include2.test.tmp3.lib CHECK: include2.test.tmp3.lib(include2.test.tmp3.obj) for bar +CHECK-NOT: include2.test.tmp4.lib(include2.test.tmp4.obj) for baz +GLOB: include2.test.tmp4.lib(include2.test.tmp4.obj) for baz From d7dd31e41791d71ad81af9cc4e7a26b26d4cb27e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 24 Sep 2024 13:56:50 -0700 Subject: [PATCH 045/212] [SLP]Better analysis of the repeated instructions during operands reordering When doing the repeated instructions analysis, better to make the reordering non-profitable, if the number of unique instructions is not power-of-2. In this case better to keep power-of-2 elements as this allows better vectorization. Fixes https://github.com/llvm/llvm-project/issues/109725 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 56 +++++++++++-------- .../X86/splat-score-adjustment.ll | 28 ++++------ 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b79e964cdb1b6..414c6388c777b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1930,30 +1930,38 @@ class BoUpSLP { /// elements in the lane, it will be vectorized with higher probability /// after removing duplicates. Currently the SLP vectorizer supports only /// vectorization of the power-of-2 number of unique scalars. - int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const { + int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx, + const SmallBitVector &UsedLanes) const { Value *IdxLaneV = getData(Idx, Lane).V; - if (!isa(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V) + if (!isa(IdxLaneV) || IdxLaneV == getData(OpIdx, Lane).V || + isa(IdxLaneV)) return 0; - SmallPtrSet Uniques; - for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln) { + SmallDenseMap Uniques; + for (unsigned Ln : seq(getNumLanes())) { if (Ln == Lane) continue; Value *OpIdxLnV = getData(OpIdx, Ln).V; if (!isa(OpIdxLnV)) return 0; - Uniques.insert(OpIdxLnV); + Uniques.try_emplace(OpIdxLnV, Ln); } - int UniquesCount = Uniques.size(); - int UniquesCntWithIdxLaneV = - Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1; + unsigned UniquesCount = Uniques.size(); + auto IdxIt = Uniques.find(IdxLaneV); + unsigned UniquesCntWithIdxLaneV = + IdxIt != Uniques.end() ? UniquesCount : UniquesCount + 1; Value *OpIdxLaneV = getData(OpIdx, Lane).V; - int UniquesCntWithOpIdxLaneV = - Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1; + auto OpIdxIt = Uniques.find(OpIdxLaneV); + unsigned UniquesCntWithOpIdxLaneV = + OpIdxIt != Uniques.end() ? UniquesCount : UniquesCount + 1; if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV) return 0; - return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) - - UniquesCntWithOpIdxLaneV) - - (PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV); + return std::min(bit_ceil(UniquesCntWithOpIdxLaneV) - + UniquesCntWithOpIdxLaneV, + UniquesCntWithOpIdxLaneV - + bit_floor(UniquesCntWithOpIdxLaneV)) - + ((IdxIt != Uniques.end() && UsedLanes.test(IdxIt->second)) + ? UniquesCntWithIdxLaneV - bit_floor(UniquesCntWithIdxLaneV) + : bit_ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV); } /// \param Lane lane of the operands under analysis. @@ -1993,7 +2001,7 @@ class BoUpSLP { /// predecessors. int getLookAheadScore(Value *LHS, Value *RHS, ArrayRef MainAltOps, int Lane, unsigned OpIdx, unsigned Idx, - bool &IsUsed) { + bool &IsUsed, const SmallBitVector &UsedLanes) { LookAheadHeuristics LookAhead(TLI, DL, SE, R, getNumLanes(), LookAheadMaxDepth); // Keep track of the instruction stack as we recurse into the operands @@ -2002,11 +2010,10 @@ class BoUpSLP { LookAhead.getScoreAtLevelRec(LHS, RHS, /*U1=*/nullptr, /*U2=*/nullptr, /*CurrLevel=*/1, MainAltOps); if (Score) { - int SplatScore = getSplatScore(Lane, OpIdx, Idx); + int SplatScore = getSplatScore(Lane, OpIdx, Idx, UsedLanes); if (Score <= -SplatScore) { - // Set the minimum score for splat-like sequence to avoid setting - // failed state. - Score = 1; + // Failed score. + Score = 0; } else { Score += SplatScore; // Scale score to see the difference between different operands @@ -2036,7 +2043,8 @@ class BoUpSLP { std::optional getBestOperand(unsigned OpIdx, int Lane, int LastLane, ArrayRef ReorderingModes, - ArrayRef MainAltOps) { + ArrayRef MainAltOps, + const SmallBitVector &UsedLanes) { unsigned NumOperands = getNumOperands(); // The operand of the previous lane at OpIdx. @@ -2092,7 +2100,7 @@ class BoUpSLP { Value *OpLeft = (LeftToRight) ? OpLastLane : Op; Value *OpRight = (LeftToRight) ? Op : OpLastLane; int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps, Lane, - OpIdx, Idx, IsUsed); + OpIdx, Idx, IsUsed, UsedLanes); if (Score > static_cast(BestOp.Score) || (Score > 0 && Score == static_cast(BestOp.Score) && Idx == OpIdx)) { @@ -2507,20 +2515,24 @@ class BoUpSLP { for (unsigned I = 0; I < NumOperands; ++I) MainAltOps[I].push_back(getData(I, FirstLane).V); + SmallBitVector UsedLanes(NumLanes); + UsedLanes.set(FirstLane); for (unsigned Distance = 1; Distance != NumLanes; ++Distance) { // Visit the lane on the right and then the lane on the left. for (int Direction : {+1, -1}) { int Lane = FirstLane + Direction * Distance; if (Lane < 0 || Lane >= (int)NumLanes) continue; + UsedLanes.set(Lane); int LastLane = Lane - Direction; assert(LastLane >= 0 && LastLane < (int)NumLanes && "Out of bounds"); // Look for a good match for each operand. for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { // Search for the operand that matches SortedOps[OpIdx][Lane-1]. - std::optional BestIdx = getBestOperand( - OpIdx, Lane, LastLane, ReorderingModes, MainAltOps[OpIdx]); + std::optional BestIdx = + getBestOperand(OpIdx, Lane, LastLane, ReorderingModes, + MainAltOps[OpIdx], UsedLanes); // By not selecting a value, we allow the operands that follow to // select a better matching value. We will get a non-null value in // the next run of getBestOperand(). diff --git a/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll index 9691cb7537a70..33fa00c1881da 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll @@ -6,29 +6,23 @@ define i32 @a() { ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: br label %[[BB1:.*]] ; CHECK: [[BB1]]: -; CHECK-NEXT: [[TMP2:%.*]] = phi i8 [ 0, [[TMP0:%.*]] ], [ [[TMP7:%.*]], %[[BB1]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ 0, [[TMP0]] ], [ [[TMP8:%.*]], %[[BB1]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ zeroinitializer, [[TMP0]] ], [ [[TMP6:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i8> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i8> [ zeroinitializer, [[TMP0]] ], [ [[TMP17:%.*]], %[[BB1]] ] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP6]] = load <4 x i8>, ptr null, align 4 -; CHECK-NEXT: [[TMP7]] = extractelement <4 x i8> [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP8]] = extractelement <4 x i8> [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = xor i8 [[TMP9]], [[TMP3]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i8> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = xor i8 [[TMP11]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = xor i8 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP14:%.*]] = xor i8 [[TMP7]], [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i8> [[TMP16]], i8 [[TMP12]], i32 2 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x i8> [[TMP17]], i8 [[TMP13]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i8> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP10]], <8 x i8> [[TMP11]], <8 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i8> [[TMP19]], <8 x i8> [[TMP18]], <8 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x i8> [[TMP20]], i8 [[TMP14]], i32 6 +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP19]], <8 x i8> [[TMP18]], <8 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = xor <8 x i8> [[TMP18]], [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = xor <8 x i8> [[TMP22]], [[TMP5]] ; CHECK-NEXT: store <8 x i8> [[TMP23]], ptr null, align 4 +; CHECK-NEXT: [[TMP17]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <2 x i32> ; CHECK-NEXT: br label %[[BB1]] ; br label %1 From cace9869775a185793122f845d81a5ff46f15728 Mon Sep 17 00:00:00 2001 From: Evan Wilde Date: Tue, 24 Sep 2024 14:11:42 -0700 Subject: [PATCH 046/212] Export empty vt_gen target (for standalone builds) (#109817) Fixing the standalone builds by exporting the vt_gen target so that the sources of the clangCodeGen target can form an explicit dependency edge on it. --- llvm/cmake/modules/CMakeLists.txt | 3 +++ llvm/cmake/modules/LLVMConfig.cmake.in | 3 +++ 2 files changed, 6 insertions(+) diff --git a/llvm/cmake/modules/CMakeLists.txt b/llvm/cmake/modules/CMakeLists.txt index d99af79aa38e0..ef4cfa3acdb59 100644 --- a/llvm/cmake/modules/CMakeLists.txt +++ b/llvm/cmake/modules/CMakeLists.txt @@ -36,6 +36,9 @@ endif() if(omp_gen IN_LIST LLVM_COMMON_DEPENDS) list(REMOVE_ITEM LLVM_COMMON_DEPENDS omp_gen) endif() +if(vt_gen IN_LIST LLVM_COMMON_DEPENDS) + list(REMOVE_ITEM LLVM_COMMON_DEPENDS vt_gen) +endif() # # Generate LLVMConfig.cmake for the build tree. diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index 7e1501a89354c..c49f10b9343ff 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -151,6 +151,9 @@ endif() if(NOT TARGET intrinsics_gen) add_custom_target(intrinsics_gen) endif() +if(NOT TARGET vt_gen) + add_custom_target(vt_gen) +endif() if(NOT TARGET omp_gen) add_custom_target(omp_gen) endif() From df4f828938db807fc7c4611896fe9659af482e81 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 24 Sep 2024 14:27:09 -0700 Subject: [PATCH 047/212] [lldb][NFC] Replace lldb's DWARFFormValue::ValueType with llvm's (#109853) --- .../SymbolFile/DWARF/DWARFFormValue.cpp | 54 +++++++++---------- .../Plugins/SymbolFile/DWARF/DWARFFormValue.h | 24 +++------ 2 files changed, 34 insertions(+), 44 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp index e1f73f1997e36..f58c6262349c6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp @@ -25,7 +25,7 @@ using namespace lldb_private::plugin::dwarf; void DWARFFormValue::Clear() { m_unit = nullptr; m_form = dw_form_t(0); - m_value = ValueTypeTag(); + m_value = ValueType(); } bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, @@ -44,68 +44,68 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, switch (m_form) { case DW_FORM_addr: assert(m_unit); - m_value.value.uval = + m_value.uval = data.GetMaxU64(offset_ptr, DWARFUnit::GetAddressByteSize(m_unit)); break; case DW_FORM_block1: - m_value.value.uval = data.GetU8(offset_ptr); + m_value.uval = data.GetU8(offset_ptr); is_block = true; break; case DW_FORM_block2: - m_value.value.uval = data.GetU16(offset_ptr); + m_value.uval = data.GetU16(offset_ptr); is_block = true; break; case DW_FORM_block4: - m_value.value.uval = data.GetU32(offset_ptr); + m_value.uval = data.GetU32(offset_ptr); is_block = true; break; case DW_FORM_data16: - m_value.value.uval = 16; + m_value.uval = 16; is_block = true; break; case DW_FORM_exprloc: case DW_FORM_block: - m_value.value.uval = data.GetULEB128(offset_ptr); + m_value.uval = data.GetULEB128(offset_ptr); is_block = true; break; case DW_FORM_string: - m_value.value.cstr = data.GetCStr(offset_ptr); + m_value.cstr = data.GetCStr(offset_ptr); break; case DW_FORM_sdata: - m_value.value.sval = data.GetSLEB128(offset_ptr); + m_value.sval = data.GetSLEB128(offset_ptr); break; case DW_FORM_strp: case DW_FORM_line_strp: case DW_FORM_sec_offset: - m_value.value.uval = data.GetMaxU64(offset_ptr, 4); + m_value.uval = data.GetMaxU64(offset_ptr, 4); break; case DW_FORM_addrx1: case DW_FORM_strx1: case DW_FORM_ref1: case DW_FORM_data1: case DW_FORM_flag: - m_value.value.uval = data.GetU8(offset_ptr); + m_value.uval = data.GetU8(offset_ptr); break; case DW_FORM_addrx2: case DW_FORM_strx2: case DW_FORM_ref2: case DW_FORM_data2: - m_value.value.uval = data.GetU16(offset_ptr); + m_value.uval = data.GetU16(offset_ptr); break; case DW_FORM_addrx3: case DW_FORM_strx3: - m_value.value.uval = data.GetMaxU64(offset_ptr, 3); + m_value.uval = data.GetMaxU64(offset_ptr, 3); break; case DW_FORM_addrx4: case DW_FORM_strx4: case DW_FORM_ref4: case DW_FORM_data4: - m_value.value.uval = data.GetU32(offset_ptr); + m_value.uval = data.GetU32(offset_ptr); break; case DW_FORM_data8: case DW_FORM_ref8: case DW_FORM_ref_sig8: - m_value.value.uval = data.GetU64(offset_ptr); + m_value.uval = data.GetU64(offset_ptr); break; case DW_FORM_addrx: case DW_FORM_loclistx: @@ -115,7 +115,7 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, case DW_FORM_ref_udata: case DW_FORM_GNU_str_index: case DW_FORM_GNU_addr_index: - m_value.value.uval = data.GetULEB128(offset_ptr); + m_value.uval = data.GetULEB128(offset_ptr); break; case DW_FORM_ref_addr: assert(m_unit); @@ -123,14 +123,14 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, ref_addr_size = m_unit->GetAddressByteSize(); else ref_addr_size = 4; - m_value.value.uval = data.GetMaxU64(offset_ptr, ref_addr_size); + m_value.uval = data.GetMaxU64(offset_ptr, ref_addr_size); break; case DW_FORM_indirect: m_form = static_cast(data.GetULEB128(offset_ptr)); indirect = true; break; case DW_FORM_flag_present: - m_value.value.uval = 1; + m_value.uval = 1; break; default: return false; @@ -138,9 +138,9 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data, } while (indirect); if (is_block) { - m_value.data = data.PeekData(*offset_ptr, m_value.value.uval); + m_value.data = data.PeekData(*offset_ptr, m_value.uval); if (m_value.data != nullptr) { - *offset_ptr += m_value.value.uval; + *offset_ptr += m_value.uval; } } @@ -461,23 +461,23 @@ const char *DWARFFormValue::AsCString() const { DWARFContext &context = m_unit->GetSymbolFileDWARF().GetDWARFContext(); if (m_form == DW_FORM_string) - return m_value.value.cstr; + return m_value.cstr; if (m_form == DW_FORM_strp) - return context.getOrLoadStrData().PeekCStr(m_value.value.uval); + return context.getOrLoadStrData().PeekCStr(m_value.uval); if (m_form == DW_FORM_GNU_str_index || m_form == DW_FORM_strx || m_form == DW_FORM_strx1 || m_form == DW_FORM_strx2 || m_form == DW_FORM_strx3 || m_form == DW_FORM_strx4) { std::optional offset = - m_unit->GetStringOffsetSectionItem(m_value.value.uval); + m_unit->GetStringOffsetSectionItem(m_value.uval); if (!offset) return nullptr; return context.getOrLoadStrData().PeekCStr(*offset); } if (m_form == DW_FORM_line_strp) - return context.getOrLoadLineStrData().PeekCStr(m_value.value.uval); + return context.getOrLoadLineStrData().PeekCStr(m_value.uval); return nullptr; } @@ -495,14 +495,14 @@ dw_addr_t DWARFFormValue::Address() const { uint32_t index_size = m_unit->GetAddressByteSize(); dw_offset_t addr_base = m_unit->GetAddrBase(); - lldb::offset_t offset = addr_base + m_value.value.uval * index_size; + lldb::offset_t offset = addr_base + m_value.uval * index_size; return symbol_file.GetDWARFContext().getOrLoadAddrData().GetMaxU64( &offset, index_size); } std::pair DWARFFormValue::ReferencedUnitAndOffset() const { - uint64_t value = m_value.value.uval; + uint64_t value = m_value.uval; switch (m_form) { case DW_FORM_ref1: case DW_FORM_ref2: @@ -550,7 +550,7 @@ DWARFDIE DWARFFormValue::Reference() const { } uint64_t DWARFFormValue::Reference(dw_offset_t base_offset) const { - uint64_t value = m_value.value.uval; + uint64_t value = m_value.uval; switch (m_form) { case DW_FORM_ref1: case DW_FORM_ref2: diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h index fdd5b3c278a4e..8ab9163e645fe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h @@ -10,7 +10,7 @@ #define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFFORMVALUE_H #include "DWARFDataExtractor.h" -#include +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include namespace lldb_private::plugin { @@ -21,17 +21,7 @@ class DWARFDIE; class DWARFFormValue { public: - typedef struct ValueTypeTag { - ValueTypeTag() : value() { value.uval = 0; } - - union { - uint64_t uval; - int64_t sval; - const char *cstr; - } value; - const uint8_t *data = nullptr; - } ValueType; - + typedef llvm::DWARFFormValue::ValueType ValueType; enum { eValueTypeInvalid = 0, eValueTypeUnsigned, @@ -67,11 +57,11 @@ class DWARFFormValue { std::pair ReferencedUnitAndOffset() const; uint64_t Reference(dw_offset_t offset) const; - bool Boolean() const { return m_value.value.uval != 0; } - uint64_t Unsigned() const { return m_value.value.uval; } - void SetUnsigned(uint64_t uval) { m_value.value.uval = uval; } - int64_t Signed() const { return m_value.value.sval; } - void SetSigned(int64_t sval) { m_value.value.sval = sval; } + bool Boolean() const { return m_value.uval != 0; } + uint64_t Unsigned() const { return m_value.uval; } + void SetUnsigned(uint64_t uval) { m_value.uval = uval; } + int64_t Signed() const { return m_value.sval; } + void SetSigned(int64_t sval) { m_value.sval = sval; } const char *AsCString() const; dw_addr_t Address() const; bool IsValid() const { return m_form != 0; } From 5d88fd33ee03105c02130e4f130ccf87997d940e Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Tue, 24 Sep 2024 17:44:48 -0400 Subject: [PATCH 048/212] attempt to bump actions runner bot version so I can build a new image --- libcxx/utils/ci/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/ci/docker-compose.yml b/libcxx/utils/ci/docker-compose.yml index 795e0dc98610d..c32e016edeb15 100644 --- a/libcxx/utils/ci/docker-compose.yml +++ b/libcxx/utils/ci/docker-compose.yml @@ -21,7 +21,7 @@ services: dockerfile: Dockerfile target: actions-builder args: - BASE_IMAGE: ghcr.io/actions/actions-runner:2.317.0 + BASE_IMAGE: ghcr.io/actions/actions-runner:2.319.1 <<: *compiler_versions android-buildkite-builder: image: ghcr.io/libcxx/android-buildkite-builder:${TAG:-latest} From aeb18ebbe0a1a2fbce9b432eefed46c1d90968ea Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 24 Sep 2024 14:54:02 -0700 Subject: [PATCH 049/212] [libc] Add MSAN unpoison annotations to recv funcs (#109844) Anywhere a struct is returned from the kernel, we need to explicitly unpoison it for MSAN. This patch does that for the recv, recvfrom, recvmsg, and socketpair functions. --- libc/src/sys/socket/linux/CMakeLists.txt | 4 ++++ libc/src/sys/socket/linux/recv.cpp | 4 ++++ libc/src/sys/socket/linux/recvfrom.cpp | 4 ++++ libc/src/sys/socket/linux/recvmsg.cpp | 9 +++++++++ libc/src/sys/socket/linux/socketpair.cpp | 6 ++++-- .../libc/test/src/sys/socket/BUILD.bazel | 2 +- 6 files changed, 26 insertions(+), 3 deletions(-) diff --git a/libc/src/sys/socket/linux/CMakeLists.txt b/libc/src/sys/socket/linux/CMakeLists.txt index f21679b5f8d3c..e1226aaad381f 100644 --- a/libc/src/sys/socket/linux/CMakeLists.txt +++ b/libc/src/sys/socket/linux/CMakeLists.txt @@ -33,6 +33,7 @@ add_entrypoint_object( DEPENDS libc.include.sys_syscall libc.include.sys_socket + libc.src.__support.macros.sanitizer libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -87,6 +88,7 @@ add_entrypoint_object( libc.include.sys_syscall libc.hdr.types.struct_sockaddr libc.hdr.types.socklen_t + libc.src.__support.macros.sanitizer libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -101,6 +103,7 @@ add_entrypoint_object( libc.include.sys_syscall libc.hdr.types.struct_sockaddr libc.hdr.types.socklen_t + libc.src.__support.macros.sanitizer libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -114,6 +117,7 @@ add_entrypoint_object( DEPENDS libc.include.sys_syscall libc.hdr.types.struct_msghdr + libc.src.__support.macros.sanitizer libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/sys/socket/linux/recv.cpp b/libc/src/sys/socket/linux/recv.cpp index 96acf449dc4bf..55a766aec0e77 100644 --- a/libc/src/sys/socket/linux/recv.cpp +++ b/libc/src/sys/socket/linux/recv.cpp @@ -13,6 +13,7 @@ #include "hdr/types/struct_sockaddr.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "src/__support/macros/sanitizer.h" #include "src/errno/libc_errno.h" #include // For SYS_SOCKET socketcall number. #include // For syscall numbers. @@ -41,6 +42,9 @@ LLVM_LIBC_FUNCTION(ssize_t, recv, libc_errno = static_cast(-ret); return -1; } + + MSAN_UNPOISON(buf, ret); + return ret; } diff --git a/libc/src/sys/socket/linux/recvfrom.cpp b/libc/src/sys/socket/linux/recvfrom.cpp index 17489a99c922d..990e58da3c1b6 100644 --- a/libc/src/sys/socket/linux/recvfrom.cpp +++ b/libc/src/sys/socket/linux/recvfrom.cpp @@ -13,6 +13,7 @@ #include "hdr/types/struct_sockaddr.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "src/__support/macros/sanitizer.h" #include "src/errno/libc_errno.h" #include // For SYS_SOCKET socketcall number. #include // For syscall numbers. @@ -43,6 +44,9 @@ LLVM_LIBC_FUNCTION(ssize_t, recvfrom, libc_errno = static_cast(-ret); return -1; } + + MSAN_UNPOISON(buf, ret); + return ret; } diff --git a/libc/src/sys/socket/linux/recvmsg.cpp b/libc/src/sys/socket/linux/recvmsg.cpp index 60045d6a80f53..f44e5800d817f 100644 --- a/libc/src/sys/socket/linux/recvmsg.cpp +++ b/libc/src/sys/socket/linux/recvmsg.cpp @@ -12,6 +12,7 @@ #include "hdr/types/struct_msghdr.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "src/__support/macros/sanitizer.h" #include "src/errno/libc_errno.h" #include // For SYS_SOCKET socketcall number. #include // For syscall numbers. @@ -36,6 +37,14 @@ LLVM_LIBC_FUNCTION(ssize_t, recvmsg, libc_errno = static_cast(-ret); return -1; } + + // Unpoison the msghdr, as well as all its components. + MSAN_UNPOISON(msg->msg_name, msg->msg_namelen); + for (size_t i = 0; i < msg->msg_iovlen; ++i) { + MSAN_UNPOISON(msg->msg_iov->iov_base, msg->msg_iov->iov_len); + } + MSAN_UNPOISON(msg->msg_control, msg->msg_controllen); + return ret; } diff --git a/libc/src/sys/socket/linux/socketpair.cpp b/libc/src/sys/socket/linux/socketpair.cpp index d459a74433906..60612ac04d613 100644 --- a/libc/src/sys/socket/linux/socketpair.cpp +++ b/libc/src/sys/socket/linux/socketpair.cpp @@ -10,10 +10,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" - #include "src/__support/macros/config.h" +#include "src/__support/macros/sanitizer.h" #include "src/errno/libc_errno.h" - #include // For SYS_SOCKET socketcall number. #include // For syscall numbers. @@ -37,6 +36,9 @@ LLVM_LIBC_FUNCTION(int, socketpair, libc_errno = -ret; return -1; } + + MSAN_UNPOISON(sv, sizeof(int) * 2); + return ret; } diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/sys/socket/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/sys/socket/BUILD.bazel index 865f5e6f49617..f7bce45d07da6 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/sys/socket/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/sys/socket/BUILD.bazel @@ -2,7 +2,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# Tests for LLVM libc string.h functions. +# Tests for LLVM libc socket.h functions. load("//libc/test:libc_test_rules.bzl", "libc_test") From 7bd4f1a0ed64aaf13e6acdafaaf50c4e7bf1d4dc Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 15:32:26 -0700 Subject: [PATCH 050/212] [memprof] Use BLAKE for FrameId (#109501) This patch uses a stronger hash function for FrameId. Without this patch, I've observed hash collisions in fairly common scenarios. Specifically, for a given GUID, I see three pairs of hash collisions in the two-dimensional range 1 <= LineOffset <= 70 and 1 <= Column <= 50, which may be a bit too frequent. With the new function, I don't see collisions at all even for a large profile. Impact on serialization/deserialization should be as follows: - Up to indexed memprof format Version 2, inclusive: The FrameIds computed with the new hash function will show up as part of the profile file. However, the deserializer only treats FrameIds as keys (but not hash values) to retrieve Frames from the on-disk hash table, so a change of the hash function shouldn't matter. - For indexed memprof format Version 3, FrameIds do not show up at all in the resulting profile file. FrameIds are only used to break ties when we sort Frames in writeMemProfFrameArray. --- llvm/include/llvm/ProfileData/MemProf.h | 31 +++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 892865a0a26fe..f8121d3573251 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -7,8 +7,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/GlobalValue.h" #include "llvm/ProfileData/MemProfData.inc" +#include "llvm/Support/BLAKE3.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" +#include "llvm/Support/HashBuilder.h" #include "llvm/Support/raw_ostream.h" #include @@ -308,24 +310,19 @@ struct Frame { << " Inline: " << IsInlineFrame << "\n"; } - // Return a hash value based on the contents of the frame. Here we don't use - // hashing from llvm ADT since we are going to persist the hash id, the hash - // combine algorithm in ADT uses a new randomized seed each time. + // Return a hash value based on the contents of the frame. Here we use a + // cryptographic hash function to minimize the chance of hash collisions. We + // do persist FrameIds as part of memprof formats up to Version 2, inclusive. + // However, the deserializer never calls this function; it uses FrameIds + // merely as keys to look up Frames proper. inline FrameId hash() const { - auto HashCombine = [](auto Value, size_t Seed) { - std::hash Hasher; - // The constant used below is the 64 bit representation of the fractional - // part of the golden ratio. Used here for the randomness in their bit - // pattern. - return Hasher(Value) + 0x9e3779b97f4a7c15 + (Seed << 6) + (Seed >> 2); - }; - - size_t Result = 0; - Result ^= HashCombine(Function, Result); - Result ^= HashCombine(LineOffset, Result); - Result ^= HashCombine(Column, Result); - Result ^= HashCombine(IsInlineFrame, Result); - return static_cast(Result); + llvm::HashBuilder, llvm::endianness::little> + HashBuilder; + HashBuilder.add(Function, LineOffset, Column, IsInlineFrame); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + FrameId Id; + std::memcpy(&Id, Hash.data(), sizeof(Hash)); + return Id; } }; From ce1f01b887ea5945ec3ffb45e05e674804d5d7a7 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Tue, 24 Sep 2024 15:42:10 -0700 Subject: [PATCH 051/212] [lldb][NFC] Add the UnwindPlan method for the header update In 206408732bca2ef464732a39c8319d47c8a1dbea I updated the UnwindPlan header to include a new method, but didn't add the actual implementation. Fix that. --- lldb/source/Symbol/UnwindPlan.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lldb/source/Symbol/UnwindPlan.cpp b/lldb/source/Symbol/UnwindPlan.cpp index b5a9aa2094f54..a06e7cfd7f544 100644 --- a/lldb/source/Symbol/UnwindPlan.cpp +++ b/lldb/source/Symbol/UnwindPlan.cpp @@ -354,6 +354,17 @@ bool UnwindPlan::Row::SetRegisterLocationToSame(uint32_t reg_num, return true; } +bool UnwindPlan::Row::SetRegisterLocationToIsDWARFExpression( + uint32_t reg_num, const uint8_t *opcodes, uint32_t len, bool can_replace) { + if (!can_replace && + m_register_locations.find(reg_num) != m_register_locations.end()) + return false; + AbstractRegisterLocation reg_loc; + reg_loc.SetIsDWARFExpression(opcodes, len); + m_register_locations[reg_num] = reg_loc; + return true; +} + bool UnwindPlan::Row::SetRegisterLocationToIsConstant(uint32_t reg_num, uint64_t constant, bool can_replace) { From ab0bd344d6556634646001f720e9a0b47801a1a4 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Tue, 24 Sep 2024 23:47:42 +0100 Subject: [PATCH 052/212] [compiler-rt] Fix #109834 for Android (#109876) --- .../lib/sanitizer_common/sanitizer_common_interceptors.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index e3a329712ac5a..b382e7a61950c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -1289,9 +1289,9 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3, static const int PR_SCHED_CORE = 62; static const int PR_SCHED_CORE_GET = 0; static const int PR_GET_PDEATHSIG = 2; - static const int PR_SET_SECCOMP = 22; # if !SANITIZER_ANDROID + static const int PR_SET_SECCOMP = 22; static const int SECCOMP_MODE_FILTER = 2; # endif if (option == PR_SET_VMA && arg2 == 0UL) { From 98260a1a2786f5e639fbcf7a1f3540554f0692f0 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 24 Sep 2024 14:49:35 -0700 Subject: [PATCH 053/212] [RISCV] Add reversed select testcase for canSplatOperand [nfc] --- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index d1c98f828e76d..abf89361cdea5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -5424,8 +5424,8 @@ for.cond.cleanup: ; preds = %vector.body ret void } -define void @sink_splat_select(ptr nocapture %a, i32 signext %x) { -; CHECK-LABEL: sink_splat_select: +define void @sink_splat_select_op1(ptr nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_select_op1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a2, a0, a2 @@ -5460,3 +5460,41 @@ vector.body: ; preds = %vector.body, %entry for.cond.cleanup: ; preds = %vector.body ret void } + +define void @sink_splat_select_op2(ptr nocapture %a, i32 signext %x) { +; CHECK-LABEL: sink_splat_select_op2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lui a1, 1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: .LBB118_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vmseq.vx v0, v9, a2 +; CHECK-NEXT: vmerge.vvm v9, v8, v9, v0 +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bne a0, a1, .LBB118_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, ptr %a, i64 %index + %load = load <4 x i32>, ptr %0, align 4 + %cond = icmp eq <4 x i32> %load, splat (i32 42) + %1 = select <4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat + store <4 x i32> %1, ptr %0, align 4 + %index.next = add nuw i64 %index, 4 + %2 = icmp eq i64 %index.next, 1024 + br i1 %2, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} From 6c7134b2667b3abbbb3f56352a0020f398994d14 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Sep 2024 15:54:02 -0700 Subject: [PATCH 054/212] [RISCV] Don't create MachineMemOperand in foldMemoryOperandImpl. (#109840) The caller already does this after we return. I think it will overwrite any MMO we add. I'm the original author of this code and I'm not sure why I did it. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 41f93fde17d32..b594531ccb095 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -736,8 +736,6 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, VirtRegMap *VRM) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - // The below optimizations narrow the load so they are only valid for little // endian. // TODO: Support big endian by adding an offset into the frame object? @@ -776,17 +774,11 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( break; } - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, FrameIndex), - MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), - MFI.getObjectAlign(FrameIndex)); - Register DstReg = MI.getOperand(0).getReg(); return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), DstReg) .addFrameIndex(FrameIndex) - .addImm(0) - .addMemOperand(MMO); + .addImm(0); } void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, From 1f9ca897987358053374b724444c2aa396e51032 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Sep 2024 15:54:49 -0700 Subject: [PATCH 055/212] [RISCV] Don't create insert/extract subreg during lowering. (#109754) Create the equivalent INSERT_SUBVECTOR/EXTRACT_SUBVECTOR instead. When we tried porting this to global isel, we noticed that subreg operations are created early. We aren't able to do this until instruction selection in global isel. For SelectionDAG, it makes sense to use insert/extract_subvector as the canonical form for these operations pre-isel. If it had come into SelectionDAG as a insert/extract_subvector we would have kept it in that form. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 27 +++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index bf822eb2c6eeb..1a52f927d69f7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10299,8 +10299,12 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, return Op; } + // Use a insert_subvector that will resolve to an insert subreg. + assert(VLen); + unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; SDValue Insert = - DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec); + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec, + DAG.getConstant(OrigIdx / Vscale, DL, XLenVT)); if (VecVT.isFixedLengthVector()) Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget); return Insert; @@ -10316,8 +10320,10 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, MVT InterSubVT = ContainerVecVT; SDValue AlignedExtract = Vec; unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue(); - if (SubVecVT.isFixedLengthVector()) + if (SubVecVT.isFixedLengthVector()) { + assert(VLen); AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock; + } if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) { InterSubVT = getLMUL1VT(ContainerVecVT); // Extract a subvector equal to the nearest full vector register type. This @@ -10494,10 +10500,14 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, // If the Idx has been completely eliminated then this is a subvector extract // which naturally aligns to a vector register. These can easily be handled - // using subregister manipulation. + // using subregister manipulation. We use an extract_subvector that will + // resolve to an extract subreg. if (RemIdx.isZero()) { if (SubVecVT.isFixedLengthVector()) { - Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec); + assert(VLen); + unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock; + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec, + DAG.getConstant(OrigIdx / Vscale, DL, XLenVT)); return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget); } return Op; @@ -10515,9 +10525,16 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, if (VecVT.bitsGT(getLMUL1VT(VecVT))) { // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and // we should have successfully decomposed the extract into a subregister. + // We use an extract_subvector that will resolve to a subreg extract. assert(SubRegIdx != RISCV::NoSubRegister); + unsigned Idx = OrigIdx - RemIdx.getKnownMinValue(); + if (SubVecVT.isFixedLengthVector()) { + assert(VLen); + Idx /= *VLen / RISCV::RVVBitsPerBlock; + } InterSubVT = getLMUL1VT(VecVT); - Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec, + DAG.getConstant(Idx, DL, XLenVT)); } // Slide this vector register down by the desired number of elements in order From 4c4fb6ada7a168e5129a22efb4d604bb6fc60b17 Mon Sep 17 00:00:00 2001 From: yonghong-song Date: Tue, 24 Sep 2024 15:55:50 -0700 Subject: [PATCH 056/212] [BPF] Do atomic_fetch_*() pattern matching with memory ordering (#107343) Three commits in this pull request: commit 1: implement pattern matching for memory ordering seq_cst, acq_rel, release, acquire and monotonic. Specially, for monotonic memory ordering (relaxed memory model), if no return value is used, locked insn is used. commit 2: add support to handle dwarf atomic modifier in BTF generation. Actually atomic modifier is ignored in BTF. commit 3: add tests for new atomic ordering support and BTF support with _Atomic type. I removed RFC tag as now patch sets are in reasonable states. For atomic fetch_and_*() operations, do pattern matching with memory ordering seq_cst, acq_rel, release, acquire and monotonic (relaxed). For fetch_and_*() operations with seq_cst/acq_rel/release/acquire ordering, atomic_fetch_*() instructions are generated. For monotonic ordering, locked insns are generated if return value is not used. Otherwise, atomic_fetch_*() insns are used. The main motivation is to resolve the kernel issue [1]. The following are memory ordering are supported: seq_cst, acq_rel, release, acquire, relaxed Current gcc style __sync_fetch_and_*() operations are all seq_cst. To use explicit memory ordering, the _Atomic type is needed. The following is an example: ``` $ cat test.c \#include void f1(_Atomic int *i) { (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed); } void f2(_Atomic int *i) { (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire); } void f3(_Atomic int *i) { (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst); } $ cat run.sh clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test.c -o test.o && llvm-objdum p -d test.o $ ./run.sh test.o: file format elf64-bpf Disassembly of section .text: 0000000000000000 : 0: b4 02 00 00 0a 00 00 00 w2 = 0xa 1: c3 21 00 00 50 00 00 00 lock *(u32 *)(r1 + 0x0) &= w2 2: 95 00 00 00 00 00 00 00 exit 0000000000000018 : 3: b4 02 00 00 0a 00 00 00 w2 = 0xa 4: c3 21 00 00 51 00 00 00 w2 = atomic_fetch_and((u32 *)(r1 + 0x0), w2) 5: 95 00 00 00 00 00 00 00 exit 0000000000000030 : 6: b4 02 00 00 0a 00 00 00 w2 = 0xa 7: c3 21 00 00 51 00 00 00 w2 = atomic_fetch_and((u32 *)(r1 + 0x0), w2) 8: 95 00 00 00 00 00 00 00 exit ``` The following is another example where return value is used: ``` $ cat test1.c \#include int f1(_Atomic int *i) { return __c11_atomic_fetch_and(i, 10, memory_order_relaxed); } int f2(_Atomic int *i) { return __c11_atomic_fetch_and(i, 10, memory_order_acquire); } int f3(_Atomic int *i) { return __c11_atomic_fetch_and(i, 10, memory_order_seq_cst); } $ cat run.sh clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test1.c -o test1.o && llvm-objdump -d test1.o $ ./run.sh test.o: file format elf64-bpf Disassembly of section .text: 0000000000000000 : 0: b4 00 00 00 0a 00 00 00 w0 = 0xa 1: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0) 2: 95 00 00 00 00 00 00 00 exit 0000000000000018 : 3: b4 00 00 00 0a 00 00 00 w0 = 0xa 4: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0) 5: 95 00 00 00 00 00 00 00 exit 0000000000000030 : 6: b4 00 00 00 0a 00 00 00 w0 = 0xa 7: c3 01 00 00 51 00 00 00 w0 = atomic_fetch_and((u32 *)(r1 + 0x0), w0) 8: 95 00 00 00 00 00 00 00 exit ``` You can see that for relaxed memory ordering, if return value is used, atomic_fetch_and() insn is used. Otherwise, if return value is not used, locked insn is used. Here is another example with global _Atomic variable: ``` $ cat test3.c \#include _Atomic int i; void f1(void) { (void)__c11_atomic_fetch_and(&i, 10, memory_order_relaxed); } void f2(void) { (void)__c11_atomic_fetch_and(&i, 10, memory_order_seq_cst); } $ cat run.sh clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -c test3.c -o test3.o && llvm-objdump -d test3.o $ ./run.sh test3.o: file format elf64-bpf Disassembly of section .text: 0000000000000000 : 0: b4 01 00 00 0a 00 00 00 w1 = 0xa 1: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll 3: c3 12 00 00 50 00 00 00 lock *(u32 *)(r2 + 0x0) &= w1 4: 95 00 00 00 00 00 00 00 exit 0000000000000028 : 5: b4 01 00 00 0a 00 00 00 w1 = 0xa 6: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll 8: c3 12 00 00 51 00 00 00 w1 = atomic_fetch_and((u32 *)(r2 + 0x0), w1) 9: 95 00 00 00 00 00 00 00 exit ``` Note that in the above compilations, '-g' is not used. The reason is due to the following IR related to _Atomic type: ``` $clang -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -O2 --target=bpf -g -S -emit-llvm test3.c ``` The related debug info for test3.c: ``` !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "i", scope: !2, file: !3, line: 3, type: !16, isLocal: false, isDefinition: true) ... !16 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !17) !17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) ``` If compiling test.c, the related debug info: ``` ... !19 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 3, type: !20, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !25) !20 = !DISubroutineType(types: !21) !21 = !{null, !22} !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !23, size: 64) !23 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !24) !24 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !25 = !{!26} !26 = !DILocalVariable(name: "i", arg: 1, scope: !19, file: !1, line: 3, type: !22) ``` All the above suggests _Atomic behaves like a modifier (e.g. const, restrict, volatile). This seems true based on doc [1]. Without proper handling DW_TAG_atomic_type, llvm BTF generation will be incorrect since the current implementation assumes no existence of DW_TAG_atomic_type. So we have two choices here: (1). llvm bpf backend processes DW_TAG_atomic_type but ignores it in BTF encoding. (2). Add another type, e.g., BTF_KIND_ATOMIC to BTF. BTF_KIND_ATOMIC behaves as a modifier like const/volatile/restrict. For choice (1), llvm bpf backend should skip dwarf::DW_TAG_atomic_type during BTF generation whenever necessary. For choice (2), BTF_KIND_ATOMIC will be added to BTF so llvm backend and kernel needs to handle that properly. The main advantage of it probably is to maintain this atomic type so it is also available to skeleton. But I think for skeleton a raw type might be good enough unless user space intends to do some atomic operation with that, which is a unlikely case. So I choose choice (1) in this RFC implementation. See the commit message of the second commit for details. [1] https://lore.kernel.org/bpf/7b941f53-2a05-48ec-9032-8f106face3a3@linux.dev/ [2] https://dwarfstd.org/issues/131112.1.html --------- --- clang/lib/Basic/Targets/BPF.cpp | 1 + clang/lib/CodeGen/CGDebugInfo.cpp | 6 +- clang/test/CodeGen/bpf-attr-type-tag-atomic.c | 16 + llvm/lib/Target/BPF/BPFInstrInfo.td | 134 ++- llvm/lib/Target/BPF/BPFMIChecking.cpp | 6 +- llvm/lib/Target/BPF/BTFDebug.cpp | 29 +- llvm/test/CodeGen/BPF/BTF/atomics.ll | 151 ++++ llvm/test/CodeGen/BPF/BTF/print_btf.py | 295 +++++++ llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll | 385 +++++++++ llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll | 781 ++++++++++++++++++ .../CodeGen/BPF/atomics_sub64_relaxed_v1.ll | 27 + llvm/test/CodeGen/BPF/xaddd_v1.ll | 25 + 12 files changed, 1827 insertions(+), 29 deletions(-) create mode 100644 clang/test/CodeGen/bpf-attr-type-tag-atomic.c create mode 100644 llvm/test/CodeGen/BPF/BTF/atomics.ll create mode 100644 llvm/test/CodeGen/BPF/BTF/print_btf.py create mode 100644 llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll create mode 100644 llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll create mode 100644 llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll create mode 100644 llvm/test/CodeGen/BPF/xaddd_v1.ll diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp index 931f407ecb0d7..f4684765b7ffb 100644 --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -38,6 +38,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__BPF_FEATURE_ADDR_SPACE_CAST"); Builder.defineMacro("__BPF_FEATURE_MAY_GOTO"); + Builder.defineMacro("__BPF_FEATURE_ATOMIC_MEM_ORDERING"); if (CPU.empty()) CPU = "v3"; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 2d2c280941bd6..4782e80f22177 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1249,8 +1249,12 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, CGM.getTarget().getDWARFAddressSpace( CGM.getTypes().getTargetAddressSpace(PointeeTy)); + const BTFTagAttributedType *BTFAttrTy; + if (auto *Atomic = PointeeTy->getAs()) + BTFAttrTy = dyn_cast(Atomic->getValueType()); + else + BTFAttrTy = dyn_cast(PointeeTy); SmallVector Annots; - auto *BTFAttrTy = dyn_cast(PointeeTy); while (BTFAttrTy) { StringRef Tag = BTFAttrTy->getAttr()->getBTFTypeTag(); if (!Tag.empty()) { diff --git a/clang/test/CodeGen/bpf-attr-type-tag-atomic.c b/clang/test/CodeGen/bpf-attr-type-tag-atomic.c new file mode 100644 index 0000000000000..a10a45dc0808d --- /dev/null +++ b/clang/test/CodeGen/bpf-attr-type-tag-atomic.c @@ -0,0 +1,16 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes -debug-info-kind=limited %s -o - | FileCheck %s + +#define __tag1 __attribute__((btf_type_tag("tag1"))) +int _Atomic __tag1 *g1; +volatile int _Atomic __tag1 *g2; + +// CHECK: distinct !DIGlobalVariable(name: "g1", scope: ![[#]], file: ![[#]], line: [[#]], type: ![[PTR1:[0-9]+]] +// CHECK: distinct !DIGlobalVariable(name: "g2", scope: ![[#]], file: ![[#]], line: [[#]], type: ![[PTR2:[0-9]+]] +// CHECK: ![[PTR2]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[BASE2:[0-9]+]], size: [[#]], annotations: ![[ANNOT:[0-9]+]]) +// CHECK: ![[BASE2]] = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: ![[BASE1:[0-9]+]]) +// CHECK: ![[BASE1]] = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: ![[BASIC:[0-9]+]]) +// CHECK: ![[BASIC]] = !DIBasicType(name: "int", size: [[#]], encoding: DW_ATE_signed) +// CHECK: ![[ANNOT]] = !{![[ENTRY:[0-9]+]]} +// CHECK: ![[ENTRY]] = !{!"btf_type_tag", !"tag1"} +// CHECK: ![[PTR1]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[BASE1]], size: [[#]], annotations: ![[ANNOT]]) diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index f7e17901c7ed5..62d6e25f83b59 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -826,13 +826,12 @@ let Predicates = [BPFNoALU32] in { } // Atomic Fetch-and- operations -class XFALU64 +class XFALU64 : TYPE_LD_ST { + []> { bits<4> dst; bits<20> addr; @@ -844,13 +843,12 @@ class XFALU64 +class XFALU32 : TYPE_LD_ST { + []> { bits<4> dst; bits<20> addr; @@ -864,26 +862,122 @@ class XFALU32; - def XFANDW32 : XFALU32; - def XFORW32 : XFALU32; - def XFXORW32 : XFALU32; + def XFADDW32 : XFALU32; + def XFANDW32 : XFALU32; + def XFORW32 : XFALU32; + def XFXORW32 : XFALU32; } let Predicates = [BPFHasALU32] in { - def XFADDD : XFALU64; + def XFADDD : XFALU64; } - def XFANDD : XFALU64; - def XFORD : XFALU64; - def XFXORD : XFALU64; + def XFANDD : XFALU64; + def XFORD : XFALU64; + def XFXORD : XFALU64; } -// atomic_load_sub can be represented as a neg followed -// by an atomic_load_add. -def : Pat<(atomic_load_sub_i32 ADDRri:$addr, GPR32:$val), - (XFADDW32 ADDRri:$addr, (NEG_32 GPR32:$val))>; -def : Pat<(atomic_load_sub_i64 ADDRri:$addr, GPR:$val), - (XFADDD ADDRri:$addr, (NEG_64 GPR:$val))>; +let Predicates = [BPFHasALU32] in { + foreach P = [// add + [atomic_load_add_i32_monotonic, XADDW32], + [atomic_load_add_i32_acquire, XFADDW32], + [atomic_load_add_i32_release, XFADDW32], + [atomic_load_add_i32_acq_rel, XFADDW32], + [atomic_load_add_i32_seq_cst, XFADDW32], + // and + [atomic_load_and_i32_monotonic, XANDW32], + [atomic_load_and_i32_acquire, XFANDW32], + [atomic_load_and_i32_release, XFANDW32], + [atomic_load_and_i32_acq_rel, XFANDW32], + [atomic_load_and_i32_seq_cst, XFANDW32], + // or + [atomic_load_or_i32_monotonic, XORW32], + [atomic_load_or_i32_acquire, XFORW32], + [atomic_load_or_i32_release, XFORW32], + [atomic_load_or_i32_acq_rel, XFORW32], + [atomic_load_or_i32_seq_cst, XFORW32], + // xor + [atomic_load_xor_i32_monotonic, XXORW32], + [atomic_load_xor_i32_acquire, XFXORW32], + [atomic_load_xor_i32_release, XFXORW32], + [atomic_load_xor_i32_acq_rel, XFXORW32], + [atomic_load_xor_i32_seq_cst, XFXORW32], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR32:$val), (P[1] ADDRri:$addr, GPR32:$val)>; + } + + // atomic_load_sub can be represented as a neg followed + // by an atomic_load_add. + foreach P = [[atomic_load_sub_i32_monotonic, XADDW32], + [atomic_load_sub_i32_acquire, XFADDW32], + [atomic_load_sub_i32_release, XFADDW32], + [atomic_load_sub_i32_acq_rel, XFADDW32], + [atomic_load_sub_i32_seq_cst, XFADDW32], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR32:$val), (P[1] ADDRri:$addr, (NEG_32 GPR32:$val))>; + } + + foreach P = [// add + [atomic_load_add_i64_monotonic, XADDD], + [atomic_load_add_i64_acquire, XFADDD], + [atomic_load_add_i64_release, XFADDD], + [atomic_load_add_i64_acq_rel, XFADDD], + [atomic_load_add_i64_seq_cst, XFADDD], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, GPR:$val)>; + } +} + +foreach P = [[atomic_load_sub_i64_monotonic, XADDD], + [atomic_load_sub_i64_acquire, XFADDD], + [atomic_load_sub_i64_release, XFADDD], + [atomic_load_sub_i64_acq_rel, XFADDD], + [atomic_load_sub_i64_seq_cst, XFADDD], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, (NEG_64 GPR:$val))>; +} + +// Borrow the idea from X86InstrFragments.td +class binop_no_use + : PatFrag<(ops node:$A, node:$B), + (operator node:$A, node:$B), + [{ return SDValue(N, 0).use_empty(); }]>; + +class binop_has_use + : PatFrag<(ops node:$A, node:$B), + (operator node:$A, node:$B), + [{ return !SDValue(N, 0).use_empty(); }]>; + +foreach op = [add, and, or, xor] in { +def atomic_load_ # op # _i64_monotonic_nu: + binop_no_use ("atomic_load_"#op# _i64_monotonic)>; +def atomic_load_ # op # _i64_monotonic_hu: + binop_has_use("atomic_load_"#op# _i64_monotonic)>; +} + +foreach P = [// and + [atomic_load_and_i64_monotonic_nu, XANDD], + [atomic_load_and_i64_monotonic_hu, XFANDD], + [atomic_load_and_i64_acquire, XFANDD], + [atomic_load_and_i64_release, XFANDD], + [atomic_load_and_i64_acq_rel, XFANDD], + [atomic_load_and_i64_seq_cst, XFANDD], + // or + [atomic_load_or_i64_monotonic_nu, XORD], + [atomic_load_or_i64_monotonic_hu, XFORD], + [atomic_load_or_i64_acquire, XFORD], + [atomic_load_or_i64_release, XFORD], + [atomic_load_or_i64_acq_rel, XFORD], + [atomic_load_or_i64_seq_cst, XFORD], + // xor + [atomic_load_xor_i64_monotonic_nu, XXORD], + [atomic_load_xor_i64_monotonic_hu, XFXORD], + [atomic_load_xor_i64_acquire, XFXORD], + [atomic_load_xor_i64_release, XFXORD], + [atomic_load_xor_i64_acq_rel, XFXORD], + [atomic_load_xor_i64_seq_cst, XFXORD], + ] in { + def : Pat<(P[0] ADDRri:$addr, GPR:$val), (P[1] ADDRri:$addr, GPR:$val)>; +} // Atomic Exchange class XCHG diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp index 24224f6c1e9e6..09635dbba1760 100644 --- a/llvm/lib/Target/BPF/BPFMIChecking.cpp +++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp @@ -118,7 +118,7 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) { RegIsGPR64 = GPR64RegClass->contains(MO.getReg()); if (!MO.isDead()) { - // It is a GPR64 live Def, we are sure it is live. */ + // It is a GPR64 live Def, we are sure it is live. if (RegIsGPR64) return true; // It is a GPR32 live Def, we are unsure whether it is really dead due to @@ -153,6 +153,10 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) { } void BPFMIPreEmitChecking::processAtomicInsts() { + if (MF->getSubtarget().getHasJmp32()) + return; + + // Only check for cpu version 1 and 2. for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { if (MI.getOpcode() != BPF::XADDW && MI.getOpcode() != BPF::XADDD) diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index 4d847abea731d..9d6dee13ca97a 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -35,6 +35,15 @@ static const char *BTFKindStr[] = { #include "llvm/DebugInfo/BTF/BTF.def" }; +static const DIType *tryRemoveAtomicType(const DIType *Ty) { + if (!Ty) + return Ty; + auto DerivedTy = dyn_cast(Ty); + if (DerivedTy && DerivedTy->getTag() == dwarf::DW_TAG_atomic_type) + return DerivedTy->getBaseType(); + return Ty; +} + /// Emit a BTF common type. void BTFTypeBase::emitType(MCStreamer &OS) { OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) + @@ -90,7 +99,7 @@ void BTFTypeDerived::completeType(BTFDebug &BDebug) { return; // The base type for PTR/CONST/VOLATILE could be void. - const DIType *ResolvedType = DTy->getBaseType(); + const DIType *ResolvedType = tryRemoveAtomicType(DTy->getBaseType()); if (!ResolvedType) { assert((Kind == BTF::BTF_KIND_PTR || Kind == BTF::BTF_KIND_CONST || Kind == BTF::BTF_KIND_VOLATILE) && @@ -305,7 +314,7 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) { } else { BTFMember.Offset = DDTy->getOffsetInBits(); } - const auto *BaseTy = DDTy->getBaseType(); + const auto *BaseTy = tryRemoveAtomicType(DDTy->getBaseType()); BTFMember.Type = BDebug.getTypeId(BaseTy); Members.push_back(BTFMember); } @@ -342,7 +351,7 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) { IsCompleted = true; DITypeRefArray Elements = STy->getTypeArray(); - auto RetType = Elements[0]; + auto RetType = tryRemoveAtomicType(Elements[0]); BTFType.Type = RetType ? BDebug.getTypeId(RetType) : 0; BTFType.NameOff = 0; @@ -350,7 +359,7 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) { // to represent the vararg, encode the NameOff/Type to be 0. for (unsigned I = 1, N = Elements.size(); I < N; ++I) { struct BTF::BTFParam Param; - auto Element = Elements[I]; + auto Element = tryRemoveAtomicType(Elements[I]); if (Element) { Param.NameOff = BDebug.addString(FuncArgNames[I]); Param.Type = BDebug.getTypeId(Element); @@ -483,7 +492,7 @@ void BTFTypeTypeTag::completeType(BTFDebug &BDebug) { IsCompleted = true; BTFType.NameOff = BDebug.addString(Tag); if (DTy) { - const DIType *ResolvedType = DTy->getBaseType(); + const DIType *ResolvedType = tryRemoveAtomicType(DTy->getBaseType()); if (!ResolvedType) BTFType.Type = 0; else @@ -800,6 +809,10 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, bool CheckPointer, bool SeenPointer) { unsigned Tag = DTy->getTag(); + if (Tag == dwarf::DW_TAG_atomic_type) + return visitTypeEntry(DTy->getBaseType(), TypeId, CheckPointer, + SeenPointer); + /// Try to avoid chasing pointees, esp. structure pointees which may /// unnecessary bring in a lot of types. if (CheckPointer && !SeenPointer) { @@ -1444,8 +1457,10 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { DIGlobal = GVE->getVariable(); if (SecName.starts_with(".maps")) visitMapDefType(DIGlobal->getType(), GVTypeId); - else - visitTypeEntry(DIGlobal->getType(), GVTypeId, false, false); + else { + const DIType *Ty = tryRemoveAtomicType(DIGlobal->getType()); + visitTypeEntry(Ty, GVTypeId, false, false); + } break; } diff --git a/llvm/test/CodeGen/BPF/BTF/atomics.ll b/llvm/test/CodeGen/BPF/BTF/atomics.ll new file mode 100644 index 0000000000000..2c02110f24c0d --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/atomics.ll @@ -0,0 +1,151 @@ +; RUN: llc -march=bpfel -mcpu=v3 -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK %s +; +; Source: +; #include +; struct gstruct_t { +; _Atomic int a; +; } gstruct; +; extern _Atomic int ext; +; _Atomic int gbl; +; _Atomic int *pgbl; +; volatile _Atomic int vvar; +; _Atomic int __attribute__((btf_type_tag("foo"))) *tagptr1; +; volatile __attribute__((btf_type_tag("foo"))) _Atomic int *tagptr2; +; _Atomic int foo(_Atomic int a1, _Atomic int *p1) { +; (void)__c11_atomic_fetch_add(&gstruct.a, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(&ext, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(&gbl, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(pgbl, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(&vvar, 1, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(p1, 1, memory_order_relaxed); +; +; return a1; +; } + +target triple = "bpf" + +%struct.gstruct_t = type { i32 } + +@gstruct = dso_local global %struct.gstruct_t zeroinitializer, align 4, !dbg !0 +@ext = external dso_local global i32, align 4, !dbg !34 +@gbl = dso_local global i32 0, align 4, !dbg !16 +@pgbl = dso_local local_unnamed_addr global ptr null, align 8, !dbg !20 +@vvar = dso_local global i32 0, align 4, !dbg !23 +@tagptr1 = dso_local local_unnamed_addr global ptr null, align 8, !dbg !26 +@tagptr2 = dso_local local_unnamed_addr global ptr null, align 8, !dbg !31 + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn +define dso_local i32 @foo(i32 returned %a1, ptr nocapture noundef %p1) local_unnamed_addr #0 !dbg !45 { +entry: + #dbg_value(i32 %a1, !49, !DIExpression(), !51) + #dbg_value(ptr %p1, !50, !DIExpression(), !51) + %0 = atomicrmw add ptr @gstruct, i32 1 monotonic, align 4, !dbg !52 + %1 = atomicrmw add ptr @ext, i32 1 monotonic, align 4, !dbg !53 + %2 = atomicrmw add ptr @gbl, i32 1 monotonic, align 4, !dbg !54 + %3 = load ptr, ptr @pgbl, align 8, !dbg !55, !tbaa !56 + %4 = atomicrmw add ptr %3, i32 1 monotonic, align 4, !dbg !60 + %5 = atomicrmw volatile add ptr @vvar, i32 1 monotonic, align 4, !dbg !61 + %6 = atomicrmw add ptr %p1, i32 1 monotonic, align 4, !dbg !62 + ret i32 %a1, !dbg !63 +} + +; CHECK: [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED +; CHECK-NEXT: [2] PTR '(anon)' type_id=1 +; CHECK-NEXT: [3] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2 +; CHECK-NEXT: 'a1' type_id=1 +; CHECK-NEXT: 'p1' type_id=2 +; CHECK-NEXT: [4] FUNC 'foo' type_id=3 linkage=global +; CHECK-NEXT: [5] STRUCT 'gstruct_t' size=4 vlen=1 +; CHECK-NEXT: 'a' type_id=1 bits_offset=0 +; CHECK-NEXT: [6] VAR 'gstruct' type_id=5, linkage=global +; CHECK-NEXT: [7] VAR 'ext' type_id=1, linkage=extern +; CHECK-NEXT: [8] VAR 'gbl' type_id=1, linkage=global +; CHECK-NEXT: [9] VAR 'pgbl' type_id=2, linkage=global +; CHECK-NEXT: [10] VOLATILE '(anon)' type_id=1 +; CHECK-NEXT: [11] VAR 'vvar' type_id=10, linkage=global +; CHECK-NEXT: [12] TYPE_TAG 'foo' type_id=1 +; CHECK-NEXT: [13] PTR '(anon)' type_id=12 +; CHECK-NEXT: [14] VAR 'tagptr1' type_id=13, linkage=global +; CHECK-NEXT: [15] TYPE_TAG 'foo' type_id=10 +; CHECK-NEXT: [16] PTR '(anon)' type_id=15 +; CHECK-NEXT: [17] VAR 'tagptr2' type_id=16, linkage=global +; CHECK-NEXT: [18] DATASEC '.bss' size=0 vlen=6 +; CHECK-NEXT: type_id=6 offset=0 size=4 +; CHECK-NEXT: type_id=8 offset=0 size=4 +; CHECK-NEXT: type_id=9 offset=0 size=8 +; CHECK-NEXT: type_id=11 offset=0 size=4 +; CHECK-NEXT: type_id=14 offset=0 size=8 +; CHECK-NEXT: type_id=17 offset=0 size=8 + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!39, !40, !41, !42, !43} +!llvm.ident = !{!44} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "gstruct", scope: !2, file: !3, line: 4, type: !36, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 96b5b6e527c024bea84f07ea11d4b3ff63468c22)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !15, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test6.c", directory: "/tmp/home/yhs/tmp3", checksumkind: CSK_MD5, checksum: "e743f2985da6027dcc5e048bd1dcccca") +!4 = !{!5} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "memory_order", file: !6, line: 68, baseType: !7, size: 32, elements: !8) +!6 = !DIFile(filename: "work/yhs/llvm-project/llvm/build/install/lib/clang/20/include/stdatomic.h", directory: "/home/yhs", checksumkind: CSK_MD5, checksum: "f17199a988fe91afffaf0f943ef87096") +!7 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!8 = !{!9, !10, !11, !12, !13, !14} +!9 = !DIEnumerator(name: "memory_order_relaxed", value: 0) +!10 = !DIEnumerator(name: "memory_order_consume", value: 1) +!11 = !DIEnumerator(name: "memory_order_acquire", value: 2) +!12 = !DIEnumerator(name: "memory_order_release", value: 3) +!13 = !DIEnumerator(name: "memory_order_acq_rel", value: 4) +!14 = !DIEnumerator(name: "memory_order_seq_cst", value: 5) +!15 = !{!0, !16, !20, !23, !26, !31, !34} +!16 = !DIGlobalVariableExpression(var: !17, expr: !DIExpression()) +!17 = distinct !DIGlobalVariable(name: "gbl", scope: !2, file: !3, line: 6, type: !18, isLocal: false, isDefinition: true) +!18 = !DIDerivedType(tag: DW_TAG_atomic_type, baseType: !19) +!19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!20 = !DIGlobalVariableExpression(var: !21, expr: !DIExpression()) +!21 = distinct !DIGlobalVariable(name: "pgbl", scope: !2, file: !3, line: 7, type: !22, isLocal: false, isDefinition: true) +!22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64) +!23 = !DIGlobalVariableExpression(var: !24, expr: !DIExpression()) +!24 = distinct !DIGlobalVariable(name: "vvar", scope: !2, file: !3, line: 8, type: !25, isLocal: false, isDefinition: true) +!25 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !18) +!26 = !DIGlobalVariableExpression(var: !27, expr: !DIExpression()) +!27 = distinct !DIGlobalVariable(name: "tagptr1", scope: !2, file: !3, line: 9, type: !28, isLocal: false, isDefinition: true) +!28 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64, annotations: !29) +!29 = !{!30} +!30 = !{!"btf_type_tag", !"foo"} +!31 = !DIGlobalVariableExpression(var: !32, expr: !DIExpression()) +!32 = distinct !DIGlobalVariable(name: "tagptr2", scope: !2, file: !3, line: 10, type: !33, isLocal: false, isDefinition: true) +!33 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64, annotations: !29) +!34 = !DIGlobalVariableExpression(var: !35, expr: !DIExpression()) +!35 = distinct !DIGlobalVariable(name: "ext", scope: !2, file: !3, line: 5, type: !18, isLocal: false, isDefinition: false) +!36 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "gstruct_t", file: !3, line: 2, size: 32, elements: !37) +!37 = !{!38} +!38 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !36, file: !3, line: 3, baseType: !18, size: 32) +!39 = !{i32 7, !"Dwarf Version", i32 5} +!40 = !{i32 2, !"Debug Info Version", i32 3} +!41 = !{i32 1, !"wchar_size", i32 4} +!42 = !{i32 7, !"frame-pointer", i32 2} +!43 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!44 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 96b5b6e527c024bea84f07ea11d4b3ff63468c22)"} +!45 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 11, type: !46, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !48) +!46 = !DISubroutineType(types: !47) +!47 = !{!18, !18, !22} +!48 = !{!49, !50} +!49 = !DILocalVariable(name: "a1", arg: 1, scope: !45, file: !3, line: 11, type: !18) +!50 = !DILocalVariable(name: "p1", arg: 2, scope: !45, file: !3, line: 11, type: !22) +!51 = !DILocation(line: 0, scope: !45) +!52 = !DILocation(line: 12, column: 9, scope: !45) +!53 = !DILocation(line: 13, column: 9, scope: !45) +!54 = !DILocation(line: 14, column: 9, scope: !45) +!55 = !DILocation(line: 15, column: 32, scope: !45) +!56 = !{!57, !57, i64 0} +!57 = !{!"any pointer", !58, i64 0} +!58 = !{!"omnipotent char", !59, i64 0} +!59 = !{!"Simple C/C++ TBAA"} +!60 = !DILocation(line: 15, column: 9, scope: !45) +!61 = !DILocation(line: 16, column: 9, scope: !45) +!62 = !DILocation(line: 17, column: 9, scope: !45) +!63 = !DILocation(line: 19, column: 3, scope: !45) diff --git a/llvm/test/CodeGen/BPF/BTF/print_btf.py b/llvm/test/CodeGen/BPF/BTF/print_btf.py new file mode 100644 index 0000000000000..6ce08b76c363e --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/print_btf.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 + +# Ad-hoc script to print BTF file in a readable format. +# Follows the same printing conventions as bpftool with format 'raw'. +# Usage: +# +# ./print_btf.py +# +# Parameters: +# +# :: a file name or '-' to read from stdin. +# +# Intended usage: +# +# llvm-objcopy --dump-section .BTF=- | ./print_btf.py - +# +# Kernel documentation contains detailed format description: +# https://www.kernel.org/doc/html/latest/bpf/btf.html + +import struct +import ctypes +import sys + + +class SafeDict(dict): + def __getitem__(self, key): + try: + return dict.__getitem__(self, key) + except KeyError: + return f"" + + +KINDS = SafeDict( + { + 0: "UNKN", + 1: "INT", + 2: "PTR", + 3: "ARRAY", + 4: "STRUCT", + 5: "UNION", + 6: "ENUM", + 7: "FWD", + 8: "TYPEDEF", + 9: "VOLATILE", + 10: "CONST", + 11: "RESTRICT", + 12: "FUNC", + 13: "FUNC_PROTO", + 14: "VAR", + 15: "DATASEC", + 16: "FLOAT", + 17: "DECL_TAG", + 18: "TYPE_TAG", + 19: "ENUM64", + } +) + +INT_ENCODING = SafeDict( + {0 << 0: "(none)", 1 << 0: "SIGNED", 1 << 1: "CHAR", 1 << 2: "BOOL"} +) + +ENUM_ENCODING = SafeDict({0: "UNSIGNED", 1: "SIGNED"}) + +FUNC_LINKAGE = SafeDict({0: "static", 1: "global", 2: "extern"}) + +VAR_LINKAGE = SafeDict({0: "static", 1: "global", 2: "extern"}) + +FWD_KIND = SafeDict( + { + 0: "struct", + 1: "union", + } +) + +for val, name in KINDS.items(): + globals()["BTF_KIND_" + name] = val + + +def warn(message): + print(message, file=sys.stderr) + + +def print_btf(filename): + if filename == "-": + buf = sys.stdin.buffer.read() + else: + with open(filename, "rb") as file: + buf = file.read() + + fmt_cache = {} + endian_pfx = "" + off = 0 + + def unpack(fmt): + nonlocal off, endian_pfx + fmt = endian_pfx + fmt + if fmt not in fmt_cache: + fmt_cache[fmt] = struct.Struct(fmt) + st = fmt_cache[fmt] + r = st.unpack_from(buf, off) + off += st.size + return r + + # Use magic number at the header start to determine endianness + (magic,) = unpack("H") + if magic == 0xEB9F: + endian_pfx = "<" + elif magic == 0x9FEB: + endian_pfx = ">" + else: + warn(f"Unexpected BTF magic: {magic:02x}") + return + + # Rest of the header + version, flags, hdr_len = unpack("BBI") + type_off, type_len, str_off, str_len = unpack("IIII") + + # Offsets in the header are relative to the end of a header + type_off += hdr_len + str_off += hdr_len + off = hdr_len + type_end = type_off + type_len + + def string(rel_off): + try: + start = str_off + rel_off + end = buf.index(b"\0", start) + if start == end: + return "(anon)" + return buf[start:end].decode("utf8") + except ValueError as e: + warn(f"Can't get string at offset {str_off} + {rel_off}: {e}") + return f"" + + idx = 1 + while off < type_end: + name_off, info, size = unpack("III") + kind = (info >> 24) & 0x1F + vlen = info & 0xFFFF + kflag = info >> 31 + kind_name = KINDS[kind] + name = string(name_off) + + def warn_nonzero(val, name): + nonlocal idx + if val != 0: + warn(f"<{idx}> {name} should be 0 but is {val}") + + if kind == BTF_KIND_INT: + (info,) = unpack("I") + encoding = (info & 0x0F000000) >> 24 + offset = (info & 0x00FF0000) >> 16 + bits = info & 0x000000FF + enc_name = INT_ENCODING[encoding] + print( + f"[{idx}] {kind_name} '{name}' size={size} " + f"bits_offset={offset} " + f"nr_bits={bits} encoding={enc_name}" + ) + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + elif kind in [ + BTF_KIND_PTR, + BTF_KIND_CONST, + BTF_KIND_VOLATILE, + BTF_KIND_RESTRICT, + ]: + print(f"[{idx}] {kind_name} '{name}' type_id={size}") + warn_nonzero(name_off, "name_off") + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + elif kind == BTF_KIND_ARRAY: + warn_nonzero(name_off, "name_off") + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + warn_nonzero(size, "size") + type, index_type, nelems = unpack("III") + print( + f"[{idx}] {kind_name} '{name}' type_id={type} " + f"index_type_id={index_type} nr_elems={nelems}" + ) + + elif kind in [BTF_KIND_STRUCT, BTF_KIND_UNION]: + print(f"[{idx}] {kind_name} '{name}' size={size} vlen={vlen}") + if kflag not in [0, 1]: + warn(f"<{idx}> kflag should 0 or 1: {kflag}") + for _ in range(0, vlen): + name_off, type, offset = unpack("III") + if kflag == 0: + print( + f"\t'{string(name_off)}' type_id={type} " + f"bits_offset={offset}" + ) + else: + bits_offset = offset & 0xFFFFFF + bitfield_size = offset >> 24 + print( + f"\t'{string(name_off)}' type_id={type} " + f"bits_offset={bits_offset} " + f"bitfield_size={bitfield_size}" + ) + + elif kind == BTF_KIND_ENUM: + encoding = ENUM_ENCODING[kflag] + print( + f"[{idx}] {kind_name} '{name}' encoding={encoding} " + f"size={size} vlen={vlen}" + ) + for _ in range(0, vlen): + (name_off,) = unpack("I") + (val,) = unpack("i" if kflag == 1 else "I") + print(f"\t'{string(name_off)}' val={val}") + + elif kind == BTF_KIND_ENUM64: + encoding = ENUM_ENCODING[kflag] + print( + f"[{idx}] {kind_name} '{name}' encoding={encoding} " + f"size={size} vlen={vlen}" + ) + for _ in range(0, vlen): + name_off, lo, hi = unpack("III") + val = hi << 32 | lo + if kflag == 1: + val = ctypes.c_long(val).value + print(f"\t'{string(name_off)}' val={val}LL") + + elif kind == BTF_KIND_FWD: + print(f"[{idx}] {kind_name} '{name}' fwd_kind={FWD_KIND[kflag]}") + warn_nonzero(vlen, "vlen") + warn_nonzero(size, "size") + + elif kind in [BTF_KIND_TYPEDEF, BTF_KIND_TYPE_TAG]: + print(f"[{idx}] {kind_name} '{name}' type_id={size}") + warn_nonzero(kflag, "kflag") + warn_nonzero(kflag, "vlen") + + elif kind == BTF_KIND_FUNC: + linkage = FUNC_LINKAGE[vlen] + print(f"[{idx}] {kind_name} '{name}' type_id={size} " f"linkage={linkage}") + warn_nonzero(kflag, "kflag") + + elif kind == BTF_KIND_FUNC_PROTO: + print(f"[{idx}] {kind_name} '{name}' ret_type_id={size} " f"vlen={vlen}") + warn_nonzero(name_off, "name_off") + warn_nonzero(kflag, "kflag") + for _ in range(0, vlen): + name_off, type = unpack("II") + print(f"\t'{string(name_off)}' type_id={type}") + + elif kind == BTF_KIND_VAR: + (linkage,) = unpack("I") + linkage = VAR_LINKAGE[linkage] + print(f"[{idx}] {kind_name} '{name}' type_id={size}, " f"linkage={linkage}") + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + elif kind == BTF_KIND_DATASEC: + print(f"[{idx}] {kind_name} '{name}' size={size} vlen={vlen}") + warn_nonzero(kflag, "kflag") + warn_nonzero(size, "size") + for _ in range(0, vlen): + type, offset, size = unpack("III") + print(f"\ttype_id={type} offset={offset} size={size}") + + elif kind == BTF_KIND_FLOAT: + print(f"[{idx}] {kind_name} '{name}' size={size}") + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + elif kind == BTF_KIND_DECL_TAG: + (component_idx,) = unpack("i") + print( + f"[{idx}] {kind_name} '{name}' type_id={size} " + + f"component_idx={component_idx}" + ) + warn_nonzero(kflag, "kflag") + warn_nonzero(vlen, "vlen") + + else: + warn( + f"<{idx}> Unexpected entry: kind={kind_name} " + f"name_off={name_off} " + f"vlen={vlen} kflag={kflag} size={size}" + ) + + idx += 1 + + +if __name__ == "__main__": + if len(sys.argv) != 2: + warn("Usage: {sys.argv[0]} ") + sys.exit(1) + print_btf(sys.argv[1]) diff --git a/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll b/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll new file mode 100644 index 0000000000000..31081586bf7af --- /dev/null +++ b/llvm/test/CodeGen/BPF/atomics_mem_order_v1.ll @@ -0,0 +1,385 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -march=bpfel -mcpu=v1 -filetype=asm < %s | FileCheck %s +; +; Source: +; $ cat atomics_mem_order_v1.c +; #include +; +; void test_fetch_add_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_add_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_sub_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_sub_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_sub(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_sub(i, 10, memory_order_release) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_and_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_and_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_and(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_and(i, 10, memory_order_release) + +; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_or_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_or_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_or(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_or(i, 10, memory_order_release) + +; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_xor_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_xor_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_xor(i, 10, memory_order_release) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } + +target triple = "bpf" + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_add_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_32_noret: +; CHECK: .Ltest_fetch_add_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r3 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw add ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw add ptr %i, i32 10 release, align 4 + %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_add_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_64_noret: +; CHECK: .Ltest_fetch_add_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw add ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw add ptr %i, i64 10 release, align 8 + %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_sub_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_64_noret: +; CHECK: .Ltest_fetch_sub_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r2 = -r2 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw sub ptr %i, i64 10 release, align 8 + %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_64_ret: +; CHECK: .Ltest_fetch_sub_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r2 = -r2 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i64 10 acquire, align 8 + %1 = atomicrmw sub ptr %i, i64 10 release, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8 + %add8 = add nsw i64 %add5, %3 + ret i64 %add8 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_and_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_64_noret: +; CHECK: .Ltest_fetch_and_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) &= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw and ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw and ptr %i, i64 10 release, align 8 + %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_and_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_64_ret: +; CHECK: .Ltest_fetch_and_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw and ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw and ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_or_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_64_noret: +; CHECK: .Ltest_fetch_or_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) |= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw or ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw or ptr %i, i64 10 release, align 8 + %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_or_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_64_ret: +; CHECK: .Ltest_fetch_or_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_or((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw or ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw or ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_xor_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_64_noret: +; CHECK: .Ltest_fetch_xor_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) ^= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw xor ptr %i, i64 10 release, align 8 + %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_xor_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_64_ret: +; CHECK: .Ltest_fetch_xor_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_xor((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw xor ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"} diff --git a/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll b/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll new file mode 100644 index 0000000000000..20b9ebcb0d473 --- /dev/null +++ b/llvm/test/CodeGen/BPF/atomics_mem_order_v3.ll @@ -0,0 +1,781 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -march=bpfel -mcpu=v3 -filetype=asm < %s | FileCheck %s +; +; Source: +; $ cat atomics_mem_order_v3.c +; #include +; +; void test_fetch_add_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_add_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_add(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_add(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_add(i, 10, memory_order_release) + +; __c11_atomic_fetch_add(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_add_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_add(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_add_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_add(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_add(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_add(i, 10, memory_order_release) + +; __c11_atomic_fetch_add(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_add(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_sub_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_sub_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_sub(i, 10, memory_order_release) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_sub_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_sub_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_sub(i, 10, memory_order_release) + +; __c11_atomic_fetch_sub(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_sub(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_and_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_and_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_and(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_and(i, 10, memory_order_release) + +; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_and_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_and(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_and_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_and(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_and(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_and(i, 10, memory_order_release) + +; __c11_atomic_fetch_and(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_and(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_or_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_or_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_or(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_or(i, 10, memory_order_release) + +; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_or_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_or(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_or_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_or(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_or(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_or(i, 10, memory_order_release) + +; __c11_atomic_fetch_or(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_or(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_xor_32_noret(int _Atomic *i) { +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } +; +; int test_fetch_xor_32_ret(int _Atomic *i) { +; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_xor(i, 10, memory_order_release) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } +; +; void test_fetch_xor_64_noret(long _Atomic *i) { +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_relaxed); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acquire); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_release); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_acq_rel); +; (void)__c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } +; +; long test_fetch_xor_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_xor(i, 10, memory_order_relaxed) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acquire) + +; __c11_atomic_fetch_xor(i, 10, memory_order_release) + +; __c11_atomic_fetch_xor(i, 10, memory_order_acq_rel) + +; __c11_atomic_fetch_xor(i, 10, memory_order_seq_cst); +; } + +target triple = "bpf" + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_add_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_32_noret: +; CHECK: .Ltest_fetch_add_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw add ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw add ptr %i, i32 10 release, align 4 + %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_add_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_32_ret: +; CHECK: .Ltest_fetch_add_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3 +; CHECK-NEXT: w0 = 10 +; CHECK-NEXT: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw add ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw add ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw add ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw add ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_add_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_64_noret: +; CHECK: .Ltest_fetch_add_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw add ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw add ptr %i, i64 10 release, align 8 + %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_add_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_add_64_ret: +; CHECK: .Ltest_fetch_add_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_add_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw add ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw add ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw add ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw add ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw add ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_sub_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_32_noret: +; CHECK: .Ltest_fetch_sub_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w2 = -w2 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw sub ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw sub ptr %i, i32 10 release, align 4 + %3 = atomicrmw sub ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw sub ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_sub_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_32_ret: +; CHECK: .Ltest_fetch_sub_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w2 = -w2 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) += w3 +; CHECK-NEXT: w0 = w2 +; CHECK-NEXT: w0 = atomic_fetch_add((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = w2 +; CHECK-NEXT: w3 = atomic_fetch_add((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw sub ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw sub ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw sub ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw sub ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_sub_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_64_noret: +; CHECK: .Ltest_fetch_sub_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r2 = -r2 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw sub ptr %i, i64 10 release, align 8 + %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_sub_64_ret: +; CHECK: .Ltest_fetch_sub_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_sub_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r2 = -r2 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) += r3 +; CHECK-NEXT: r0 = r2 +; CHECK-NEXT: r0 = atomic_fetch_add((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = r2 +; CHECK-NEXT: r3 = atomic_fetch_add((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw sub ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw sub ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw sub ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw sub ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_and_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_32_noret: +; CHECK: .Ltest_fetch_and_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) &= w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_and((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw and ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw and ptr %i, i32 10 release, align 4 + %3 = atomicrmw and ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw and ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_and_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_32_ret: +; CHECK: .Ltest_fetch_and_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) &= w3 +; CHECK-NEXT: w0 = 10 +; CHECK-NEXT: w0 = atomic_fetch_and((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_and((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_and((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw and ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw and ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw and ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw and ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_and_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_64_noret: +; CHECK: .Ltest_fetch_and_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) &= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw and ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw and ptr %i, i64 10 release, align 8 + %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_and_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_and_64_ret: +; CHECK: .Ltest_fetch_and_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_and_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_and((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_and((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw and ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw and ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw and ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw and ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw and ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_or_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_32_noret: +; CHECK: .Ltest_fetch_or_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) |= w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_or((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw or ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw or ptr %i, i32 10 release, align 4 + %3 = atomicrmw or ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw or ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_or_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_32_ret: +; CHECK: .Ltest_fetch_or_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) |= w3 +; CHECK-NEXT: w0 = 10 +; CHECK-NEXT: w0 = atomic_fetch_or((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_or((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_or((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw or ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw or ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw or ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw or ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_or_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_64_noret: +; CHECK: .Ltest_fetch_or_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) |= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw or ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw or ptr %i, i64 10 release, align 8 + %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_or_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_or_64_ret: +; CHECK: .Ltest_fetch_or_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_or_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_or((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_or((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw or ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw or ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw or ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw or ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw or ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_xor_32_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_32_noret: +; CHECK: .Ltest_fetch_xor_32_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_32_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) ^= w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw xor ptr %i, i32 10 acquire, align 4 + %2 = atomicrmw xor ptr %i, i32 10 release, align 4 + %3 = atomicrmw xor ptr %i, i32 10 acq_rel, align 4 + %4 = atomicrmw xor ptr %i, i32 10 seq_cst, align 4 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i32 @test_fetch_xor_32_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_32_ret: +; CHECK: .Ltest_fetch_xor_32_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_32_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: w2 = 10 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: lock *(u32 *)(r1 + 0) ^= w3 +; CHECK-NEXT: w0 = 10 +; CHECK-NEXT: w0 = atomic_fetch_xor((u32 *)(r1 + 0), w0) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w3 = 10 +; CHECK-NEXT: w3 = atomic_fetch_xor((u32 *)(r1 + 0), w3) +; CHECK-NEXT: w0 += w3 +; CHECK-NEXT: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2) +; CHECK-NEXT: w0 += w2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i32 10 monotonic, align 4 + %1 = atomicrmw xor ptr %i, i32 10 acquire, align 4 + %add = add nsw i32 %1, %0 + %2 = atomicrmw xor ptr %i, i32 10 release, align 4 + %add5 = add nsw i32 %add, %2 + %3 = atomicrmw xor ptr %i, i32 10 acq_rel, align 4 + %add8 = add nsw i32 %add5, %3 + %4 = atomicrmw xor ptr %i, i32 10 seq_cst, align 4 + %add11 = add nsw i32 %add8, %4 + ret i32 %add11 +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local void @test_fetch_xor_64_noret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_64_noret: +; CHECK: .Ltest_fetch_xor_64_noret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_64_noret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: lock *(u64 *)(r1 + 0) ^= r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8 + %2 = atomicrmw xor ptr %i, i64 10 release, align 8 + %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8 + %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8 + ret void +} + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_xor_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +; CHECK-LABEL: test_fetch_xor_64_ret: +; CHECK: .Ltest_fetch_xor_64_ret$local: +; CHECK-NEXT: .type .Ltest_fetch_xor_64_ret$local,@function +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: r2 = 10 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 = 10 +; CHECK-NEXT: r0 = atomic_fetch_xor((u64 *)(r1 + 0), r0) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r3 = 10 +; CHECK-NEXT: r3 = atomic_fetch_xor((u64 *)(r1 + 0), r3) +; CHECK-NEXT: r0 += r3 +; CHECK-NEXT: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK-NEXT: r0 += r2 +; CHECK-NEXT: exit +entry: + %0 = atomicrmw xor ptr %i, i64 10 monotonic, align 8 + %1 = atomicrmw xor ptr %i, i64 10 acquire, align 8 + %add = add nsw i64 %1, %0 + %2 = atomicrmw xor ptr %i, i64 10 release, align 8 + %add5 = add nsw i64 %add, %2 + %3 = atomicrmw xor ptr %i, i64 10 acq_rel, align 8 + %add8 = add nsw i64 %add5, %3 + %4 = atomicrmw xor ptr %i, i64 10 seq_cst, align 8 + %add11 = add nsw i64 %add8, %4 + ret i64 %add11 +} + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"} diff --git a/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll b/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll new file mode 100644 index 0000000000000..4d630d475b296 --- /dev/null +++ b/llvm/test/CodeGen/BPF/atomics_sub64_relaxed_v1.ll @@ -0,0 +1,27 @@ +; RUN: not llc -march=bpfel -mcpu=v1 -filetype=asm < %s +; +; Source: +; $ cat atomics_sub64_relaxed_v1.c +; #include +; +; long test_fetch_sub_64_ret(long _Atomic *i) { +; return __c11_atomic_fetch_sub(i, 10, memory_order_relaxed); +; } + +target triple = "bpf" + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_sub_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +entry: + %0 = atomicrmw sub ptr %i, i64 10 monotonic, align 8 + ret i64 %0 +} + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"} diff --git a/llvm/test/CodeGen/BPF/xaddd_v1.ll b/llvm/test/CodeGen/BPF/xaddd_v1.ll new file mode 100644 index 0000000000000..d3bfd8d81b15b --- /dev/null +++ b/llvm/test/CodeGen/BPF/xaddd_v1.ll @@ -0,0 +1,25 @@ +; RUN: not llc -march=bpfel -mcpu=v1 -filetype=asm < %s +; +; Source: +; $ cat xaddd_v1.c +; long test_fetch_add_64_ret(long *i) { +; return __sync_fetch_and_add(i, 10); +; } + +target triple = "bpf" + +; Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) +define dso_local i64 @test_fetch_add_64_ret(ptr nocapture noundef %i) local_unnamed_addr #0 { +entry: + %0 = atomicrmw add ptr %i, i64 10 seq_cst, align 8 + ret i64 %0 +} + +attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v1" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 20.0.0git (git@github.com:yonghong-song/llvm-project.git 6f71e34e194dab5a52cb2211af575c6067e9e504)"} From 4a9da96dc68d878893399210888a03117b39b802 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 24 Sep 2024 15:59:33 -0700 Subject: [PATCH 057/212] [clang] Add cc1 --output-asm-variant= to set output syntax 2fcaa549a824efeb56e807fcf750a56bf985296b (2010) added cc1as option `-output-asm-variant` (untested) to set the output syntax. `clang -cc1as -filetype asm -output-asm-variant 1` allows AT&T input and Intel output (`AssemblerDialect` is also used by non-x86 targets). This patch renames the cc1as option (to avoid collision with -o) and makes it available for cc1 to set output syntax. This allows different input & output syntax: ``` echo 'asm("mov $1, %eax");' | clang -xc - -S -o - -Xclang --output-asm-variant=1 ``` Note: `AsmWriterFlavor` (with a misleading name), used to initialize MCAsmInfo::AssemblerDialect, is primarily used for assembly input, not for output. Therefore, `echo 'asm("mov $1, %eax");' | clang -x c - -mllvm --x86-asm-syntax=intel -S -o -`, which achieves a similar goal before Clang 19, was unintended. Close #109157 Pull Request: https://github.com/llvm/llvm-project/pull/109360 --- clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Driver/Options.td | 5 ++-- clang/lib/CodeGen/BackendUtil.cpp | 2 ++ .../test/CodeGen/inline-asm-output-variant.c | 26 +++++++++++++++++++ clang/test/Misc/cc1as-output-asm-variant.c | 8 ++++++ llvm/include/llvm/MC/MCTargetOptions.h | 2 ++ llvm/lib/CodeGen/LLVMTargetMachine.cpp | 4 ++- 7 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/inline-asm-output-variant.c create mode 100644 clang/test/Misc/cc1as-output-asm-variant.c diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index b600198998d85..2893377e5a38b 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -96,6 +96,7 @@ CODEGENOPT(EmulatedTLS , 1, 0) ///< Set by default or -f[no-]emulated-tls. ENUM_CODEGENOPT(EmbedBitcode, EmbedBitcodeKind, 2, Embed_Off) /// Inline asm dialect, -masm=(att|intel) ENUM_CODEGENOPT(InlineAsmDialect, InlineAsmDialectKind, 1, IAD_ATT) +CODEGENOPT(OutputAsmVariant, 2, 3) ///< Set the asm variant for output (3: unspecified). CODEGENOPT(ForbidGuardVariables , 1, 0) ///< Issue errors if C++ guard variables ///< are required. CODEGENOPT(FunctionSections , 1, 0) ///< Set when -ffunction-sections is enabled. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 002f60350543d..23bd686a85f52 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7217,6 +7217,9 @@ def fuse_ctor_homing: Flag<["-"], "fuse-ctor-homing">, def as_secure_log_file : Separate<["-"], "as-secure-log-file">, HelpText<"Emit .secure_log_unique directives to this filename.">, MarshallingInfoString>; +def output_asm_variant : Joined<["--"], "output-asm-variant=">, + HelpText<"Select the asm variant (integer) to use for output (3: unspecified)">, + MarshallingInfoInt, "3">; } // let Visibility = [CC1Option, CC1AsOption] @@ -8307,8 +8310,6 @@ def filetype : Separate<["-"], "filetype">, HelpText<"Specify the output file type ('asm', 'null', or 'obj')">; // Transliterate Options -def output_asm_variant : Separate<["-"], "output-asm-variant">, - HelpText<"Select the asm variant index to use for output">; def show_encoding : Flag<["-"], "show-encoding">, HelpText<"Show instruction encoding information in transliterate mode">; def show_inst : Flag<["-"], "show-inst">, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index fa49763e312f1..916c92adb8930 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -509,6 +509,8 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.MCOptions.X86RelaxRelocations = CodeGenOpts.X86RelaxRelocations; Options.MCOptions.CompressDebugSections = CodeGenOpts.getCompressDebugSections(); + if (CodeGenOpts.OutputAsmVariant != 3) // 3 (default): not specified + Options.MCOptions.OutputAsmVariant = CodeGenOpts.OutputAsmVariant; Options.MCOptions.ABIName = TargetOpts.ABI; for (const auto &Entry : HSOpts.UserEntries) if (!Entry.IsFramework && diff --git a/clang/test/CodeGen/inline-asm-output-variant.c b/clang/test/CodeGen/inline-asm-output-variant.c new file mode 100644 index 0000000000000..376a876754034 --- /dev/null +++ b/clang/test/CodeGen/inline-asm-output-variant.c @@ -0,0 +1,26 @@ +// REQUIRES: x86-registered-target +/// AT&T input +// RUN: %clang_cc1 -triple x86_64 -S --output-asm-variant=0 %s -o - | FileCheck --check-prefix=ATT %s +// RUN: %clang_cc1 -triple x86_64 -S --output-asm-variant=1 %s -o - | FileCheck --check-prefix=INTEL %s + +/// Intel input +// RUN: %clang_cc1 -triple x86_64 -S -D INTEL -mllvm -x86-asm-syntax=intel -inline-asm=intel %s -o - | FileCheck --check-prefix=INTEL %s +// RUN: %clang_cc1 -triple x86_64 -S -D INTEL -mllvm -x86-asm-syntax=intel -inline-asm=intel --output-asm-variant=1 %s -o - | FileCheck --check-prefix=INTEL %s + +// ATT: movl $1, %eax +// ATT: movl $2, %eax + +// INTEL: mov eax, 1 +// INTEL: mov eax, 2 + +#ifdef INTEL +asm("mov eax, 1"); +void foo() { + asm("mov eax, 2"); +} +#else +asm("mov $1, %eax"); +void foo() { + asm("mov $2, %eax"); +} +#endif diff --git a/clang/test/Misc/cc1as-output-asm-variant.c b/clang/test/Misc/cc1as-output-asm-variant.c new file mode 100644 index 0000000000000..c287c62fc95e4 --- /dev/null +++ b/clang/test/Misc/cc1as-output-asm-variant.c @@ -0,0 +1,8 @@ +// REQUIRES: x86-registered-target +// RUN: %clang -cc1as -triple x86_64 %s -o - | FileCheck %s --check-prefix=ATT +// RUN: %clang -cc1as -triple x86_64 %s --output-asm-variant=1 -o - | FileCheck %s --check-prefix=INTEL + +// ATT: movl $1, %eax +// INTEL: mov eax, 1 + +mov $1, %eax diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index 2e2025c2e7b2c..7b0d81faf73d2 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -72,6 +72,8 @@ class MCTargetOptions { bool X86Sse2Avx = false; + std::optional OutputAsmVariant; + EmitDwarfUnwindType EmitDwarfUnwind; int DwarfVersion = 0; diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 4ff22057b290f..ea36fedef93ac 100644 --- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -160,7 +160,9 @@ Expected> LLVMTargetMachine::createMCStreamer( switch (FileType) { case CodeGenFileType::AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter( - getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI); + getTargetTriple(), + Options.MCOptions.OutputAsmVariant.value_or(MAI.getAssemblerDialect()), + MAI, MII, MRI); // Create a code emitter if asked to show the encoding. std::unique_ptr MCE; From 02d6aad5cc940f17904c1288dfabc3fd2d439279 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 24 Sep 2024 16:18:48 -0700 Subject: [PATCH 058/212] [MemProf] Reduce unnecessary context id computation (NFC) (#109857) One of the memory reduction techniques was to compute node context ids on the fly. This reduced memory at the expense of some compile time increase. For a large binary we were spending a lot of time invoking getContextIds on the node during assignStackNodesPostOrder, because we were iterating through the stack ids for a call from leaf to root (first to last node in the parlance used in that code). However, all calls for a given entry in the StackIdToMatchingCalls map share the same last node, so we can borrow the approach used by similar code in updateStackNodes and compute the context ids on the last node once, then iterate each call's stack ids in reverse order while reusing the last node's context ids. This reduced the thin link time by 43% for a large target. It isn't clear why there wasn't a similar increase measured when introducing the node context id recomputation, but the compile time was longer to start with then. --- .../IPO/MemProfContextDisambiguation.cpp | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 6927fe538e367..576a31f8b86ae 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1362,12 +1362,22 @@ void CallsiteContextGraph:: } } +#ifndef NDEBUG // Find the node for the last stack id, which should be the same // across all calls recorded for this id, and is this node's id. uint64_t LastId = Node->OrigStackOrAllocId; ContextNode *LastNode = getNodeForStackId(LastId); // We should only have kept stack ids that had nodes. assert(LastNode); + assert(LastNode == Node); +#else + ContextNode *LastNode = Node; +#endif + + // Compute the last node's context ids once, as it is shared by all calls in + // this entry. + DenseSet LastNodeContextIds = LastNode->getContextIds(); + assert(!LastNodeContextIds.empty()); for (unsigned I = 0; I < Calls.size(); I++) { auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; @@ -1389,40 +1399,43 @@ void CallsiteContextGraph:: assert(LastId == Ids.back()); - ContextNode *FirstNode = getNodeForStackId(Ids[0]); - assert(FirstNode); - // Recompute the context ids for this stack id sequence (the // intersection of the context ids of the corresponding nodes). // Start with the ids we saved in the map for this call, which could be // duplicated context ids. We have to recompute as we might have overlap // overlap between the saved context ids for different last nodes, and // removed them already during the post order traversal. - set_intersect(SavedContextIds, FirstNode->getContextIds()); - ContextNode *PrevNode = nullptr; - for (auto Id : Ids) { + set_intersect(SavedContextIds, LastNodeContextIds); + ContextNode *PrevNode = LastNode; + bool Skip = false; + // Iterate backwards through the stack Ids, starting after the last Id + // in the list, which was handled once outside for all Calls. + for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { + auto Id = *IdIter; ContextNode *CurNode = getNodeForStackId(Id); // We should only have kept stack ids that had nodes and weren't // recursive. assert(CurNode); assert(!CurNode->Recursive); - if (!PrevNode) { - PrevNode = CurNode; - continue; - } - auto *Edge = CurNode->findEdgeFromCallee(PrevNode); + + auto *Edge = CurNode->findEdgeFromCaller(PrevNode); if (!Edge) { - SavedContextIds.clear(); + Skip = true; break; } PrevNode = CurNode; + + // Update the context ids, which is the intersection of the ids along + // all edges in the sequence. set_intersect(SavedContextIds, Edge->getContextIds()); // If we now have no context ids for clone, skip this call. - if (SavedContextIds.empty()) + if (SavedContextIds.empty()) { + Skip = true; break; + } } - if (SavedContextIds.empty()) + if (Skip) continue; // Create new context node. @@ -1433,6 +1446,9 @@ void CallsiteContextGraph:: NonAllocationCallToContextNodeMap[Call] = NewNode; NewNode->AllocTypes = computeAllocType(SavedContextIds); + ContextNode *FirstNode = getNodeForStackId(Ids[0]); + assert(FirstNode); + // Connect to callees of innermost stack frame in inlined call chain. // This updates context ids for FirstNode's callee's to reflect those // moved to NewNode. From cd53c8429efc82c8756d85c23fc347901c3c948d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 16:22:53 -0700 Subject: [PATCH 059/212] [RISCV] Fix a warning This patch fixes: llvm/lib/Target/RISCV/RISCVISelLowering.cpp:10479:12: error: variable 'SubRegIdx' set but not used [-Werror,-Wunused-but-set-variable] --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1a52f927d69f7..7b00b2514c4ef 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10527,6 +10527,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, // we should have successfully decomposed the extract into a subregister. // We use an extract_subvector that will resolve to a subreg extract. assert(SubRegIdx != RISCV::NoSubRegister); + (void)SubRegIdx; unsigned Idx = OrigIdx - RemIdx.getKnownMinValue(); if (SubVecVT.isFixedLengthVector()) { assert(VLen); From 96eff99e64aaffed1cb8b378ed8f3ac09786e986 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 16:37:44 -0700 Subject: [PATCH 060/212] [llvm-reduce] Avoid repeated hash lookups (NFC) (#109747) --- .../llvm-reduce/deltas/ReduceDistinctMetadata.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp index 32fca80b5e5d6..02129263f6af4 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp @@ -41,10 +41,8 @@ reduceNodes(MDNode *Root, if (MDNode *Operand = dyn_cast(CurrentNode->getOperand(I).get())) { // Check whether node has been visited - if (!VisitedNodes.contains(Operand)) { + if (VisitedNodes.insert(Operand)) NodesToTraverse.push(Operand); - VisitedNodes.insert(Operand); - } // Delete the node only if it is distinct if (Operand->isDistinct()) { // Add to removal list @@ -74,10 +72,8 @@ static void cleanUpTemporaries(NamedMDNode &NamedNode, MDTuple *TemporaryTuple, // If the node hasn't been traversed yet, add it to the queue of nodes to // traverse. if (MDTuple *TupleI = dyn_cast((*I))) { - if (!VisitedNodes.contains(TupleI)) { + if (VisitedNodes.insert(TupleI)) NodesToTraverse.push(TupleI); - VisitedNodes.insert(TupleI); - } } } @@ -113,12 +109,10 @@ static void cleanUpTemporaries(NamedMDNode &NamedNode, MDTuple *TemporaryTuple, // Push the remaining nodes into the queue for (unsigned int I = 0; I < CurrentTuple->getNumOperands(); ++I) { MDTuple *Operand = dyn_cast(CurrentTuple->getOperand(I).get()); - if (Operand && !VisitedNodes.contains(Operand)) { - NodesToTraverse.push(Operand); + if (Operand && VisitedNodes.insert(Operand)) // If the node hasn't been traversed yet, add it to the queue of nodes // to traverse. - VisitedNodes.insert(Operand); - } + NodesToTraverse.push(Operand); } } } From 9a99e559322e999240184f0159993023ffb355f1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 16:39:47 -0700 Subject: [PATCH 061/212] [gold-plugin] Avoid repeated hash lookups (NFC) (#109748) --- llvm/tools/gold/gold-plugin.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 0b175a3852e42..0377791d85b3f 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -1057,9 +1057,11 @@ static std::vector, bool>> runLTO() { getThinLTOOldAndNewSuffix(OldSuffix, NewSuffix); for (claimed_file &F : Modules) { - if (options::thinlto && !HandleToInputFile.count(F.leader_handle)) - HandleToInputFile.insert(std::make_pair( - F.leader_handle, std::make_unique(F.handle))); + if (options::thinlto) { + auto [It, Inserted] = HandleToInputFile.try_emplace(F.leader_handle); + if (Inserted) + It->second = std::make_unique(F.handle); + } // In case we are thin linking with a minimized bitcode file, ensure // the module paths encoded in the index reflect where the backends // will locate the full bitcode files for compiling/importing. From 2f9c9ff789a08a862024dab0626b40db604572c8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 16:40:16 -0700 Subject: [PATCH 062/212] [llvm-extract] Avoid repeated hash lookups (NFC) (#109749) --- llvm/tools/llvm-extract/llvm-extract.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/tools/llvm-extract/llvm-extract.cpp b/llvm/tools/llvm-extract/llvm-extract.cpp index 4ee644f1e2906..5fc9a31ab4ad7 100644 --- a/llvm/tools/llvm-extract/llvm-extract.cpp +++ b/llvm/tools/llvm-extract/llvm-extract.cpp @@ -297,9 +297,8 @@ int main(int argc, char **argv) { Function *CF = CB->getCalledFunction(); if (!CF) continue; - if (CF->isDeclaration() || GVs.count(CF)) + if (CF->isDeclaration() || !GVs.insert(CF)) continue; - GVs.insert(CF); Workqueue.push_back(CF); } } From 4c3fccdd8831c8ef8c7191a7f68e8341c3123eb7 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 16:40:54 -0700 Subject: [PATCH 063/212] [llvm-ifs] Avoid repeated map lookups (NFC) (#109750) --- llvm/tools/llvm-ifs/llvm-ifs.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/tools/llvm-ifs/llvm-ifs.cpp b/llvm/tools/llvm-ifs/llvm-ifs.cpp index 169f601d459e3..b76ea8dec0c98 100644 --- a/llvm/tools/llvm-ifs/llvm-ifs.cpp +++ b/llvm/tools/llvm-ifs/llvm-ifs.cpp @@ -441,12 +441,9 @@ int llvm_ifs_main(int argc, char **argv, const llvm::ToolContext &) { } for (auto Symbol : TargetStub->Symbols) { - auto SI = SymbolMap.find(Symbol.Name); - if (SI == SymbolMap.end()) { - SymbolMap.insert( - std::pair(Symbol.Name, Symbol)); + auto [SI, Inserted] = SymbolMap.try_emplace(Symbol.Name, Symbol); + if (Inserted) continue; - } assert(Symbol.Name == SI->second.Name && "Symbol Names Must Match."); From 3cfd0c0d36975504099034ce11f4df07c5a7eba7 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 25 Sep 2024 00:44:57 +0100 Subject: [PATCH 064/212] [SPIRV][RFC] Rework / extend support for memory scopes (#106429) This change adds support for correctly lowering the `__scoped` Clang builtins, and corresponding scoped LLVM instructions. These were previously unconditionally lowered to Device scope, which is possibly incorrect. Furthermore, the default / implicit scope is changed from Device (an OpenCL assumption) to AllSvmDevices (aka System), since the SPIR-V BE is not OpenCL specific / can ingest IR coming from other language front-ends. OpenCL defaulting to Device scope is now reflected in the front-end handling of atomic ops, which seems preferable. --- clang/lib/Basic/Targets/SPIR.h | 6 + clang/lib/CodeGen/CGAtomic.cpp | 13 +- clang/lib/CodeGen/Targets/SPIR.cpp | 36 ++ clang/test/CodeGen/scoped-atomic-ops.c | 336 ++++++++++++------ ...atomic-builtins-default-to-device-scope.cl | 235 ++++++++++++ clang/test/Sema/scoped-atomic-ops.c | 1 + llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 3 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 57 +-- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 25 ++ llvm/lib/Target/SPIRV/SPIRVUtils.h | 2 + .../CodeGen/SPIRV/AtomicCompareExchange.ll | 6 +- llvm/test/CodeGen/SPIRV/atomicrmw.ll | 25 +- .../atomicrmw_faddfsub_double.ll | 7 +- .../atomicrmw_faddfsub_float.ll | 7 +- .../atomicrmw_faddfsub_half.ll | 7 +- .../atomicrmw_fminfmax_double.ll | 7 +- .../atomicrmw_fminfmax_float.ll | 7 +- .../atomicrmw_fminfmax_half.ll | 7 +- llvm/test/CodeGen/SPIRV/fence.ll | 10 +- .../CodeGen/SPIRV/instructions/atomic-ptr.ll | 2 +- .../test/CodeGen/SPIRV/instructions/atomic.ll | 31 +- .../SPIRV/instructions/atomic_acqrel.ll | 4 +- .../CodeGen/SPIRV/instructions/atomic_seq.ll | 4 +- llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll | 163 +++++++++ 24 files changed, 775 insertions(+), 226 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl create mode 100644 llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 37cf9d7921bac..8a26db7971cba 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -335,6 +335,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo { PointerWidth = PointerAlign = 32; SizeType = TargetInfo::UnsignedInt; PtrDiffType = IntPtrType = TargetInfo::SignedInt; + // SPIR-V has core support for atomic ops, and Int32 is always available; + // we take the maximum because it's possible the Host supports wider types. + MaxAtomicInlineWidth = std::max(MaxAtomicInlineWidth, 32); resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"); } @@ -356,6 +359,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo { PointerWidth = PointerAlign = 64; SizeType = TargetInfo::UnsignedLong; PtrDiffType = IntPtrType = TargetInfo::SignedLong; + // SPIR-V has core support for atomic ops, and Int64 is always available; + // we take the maximum because it's possible the Host supports wider types. + MaxAtomicInlineWidth = std::max(MaxAtomicInlineWidth, 64); resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"); } diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index fbe9569e50ef6..a2a87e012b8b2 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -766,8 +766,19 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, // LLVM atomic instructions always have synch scope. If clang atomic // expression has no scope operand, use default LLVM synch scope. if (!ScopeModel) { + llvm::SyncScope::ID SS; + if (CGF.getLangOpts().OpenCL) + // OpenCL approach is: "The functions that do not have memory_scope + // argument have the same semantics as the corresponding functions with + // the memory_scope argument set to memory_scope_device." See ref.: + // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions + SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(), + SyncScope::OpenCLDevice, + Order, CGF.getLLVMContext()); + else + SS = llvm::SyncScope::System; EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, - Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID("")); + Order, SS); return; } diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index cc52925e2e523..d5e8e4f7a5916 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -58,7 +58,36 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {} void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const override; }; + +inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) { + switch (Scope) { + case SyncScope::HIPSingleThread: + case SyncScope::SingleScope: + return "singlethread"; + case SyncScope::HIPWavefront: + case SyncScope::OpenCLSubGroup: + case SyncScope::WavefrontScope: + return "subgroup"; + case SyncScope::HIPWorkgroup: + case SyncScope::OpenCLWorkGroup: + case SyncScope::WorkgroupScope: + return "workgroup"; + case SyncScope::HIPAgent: + case SyncScope::OpenCLDevice: + case SyncScope::DeviceScope: + return "device"; + case SyncScope::SystemScope: + case SyncScope::HIPSystem: + case SyncScope::OpenCLAllSVMDevices: + return ""; + } + return ""; +} } // End anonymous namespace. void CommonSPIRABIInfo::setCCs() { @@ -188,6 +217,13 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( } } +llvm::SyncScope::ID +SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope, + llvm::AtomicOrdering, + llvm::LLVMContext &Ctx) const { + return Ctx.getOrInsertSyncScopeID(mapClangSyncScopeToLLVM(Scope)); +} + /// Construct a SPIR-V target extension type for the given OpenCL image type. static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType, StringRef OpenCLName, diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c index b0032046639b8..cf98812a07e91 100644 --- a/clang/test/CodeGen/scoped-atomic-ops.c +++ b/clang/test/CodeGen/scoped-atomic-ops.c @@ -1,12 +1,21 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ -// RUN: -fvisibility=hidden | FileCheck %s +// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ +// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s -// CHECK-LABEL: define hidden i32 @fi1a( -// CHECK: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi1a( +// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV: define hidden spir_func i32 @fi1a( +// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4 int fi1a(int *i) { int v; __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -17,13 +26,18 @@ int fi1a(int *i) { return v; } -// CHECK-LABEL: define hidden i32 @fi1b( -// CHECK: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 -// +// AMDGCN-LABEL: define hidden i32 @fi1b( +// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi1b( +// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 int fi1b(int *i) { *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -33,13 +47,18 @@ int fi1b(int *i) { return *i; } -// CHECK-LABEL: define hidden void @fi2a( -// CHECK: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 -// +// AMDGCN-LABEL: define hidden void @fi2a( +// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi2a( +// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4 +// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 void fi2a(int *i) { int v = 1; __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -49,12 +68,18 @@ void fi2a(int *i) { __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden void @fi2b( -// CHECK: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi2b( +// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi2b( +// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4 +// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 void fi2b(int *i) { __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -63,15 +88,24 @@ void fi2b(int *i) { __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden void @fi3a( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3a( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] monotonic, align 4 void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -83,15 +117,24 @@ void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden void @fi3b( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("agent-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3b( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("device") monotonic, align 4 void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -103,15 +146,24 @@ void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden void @fi3c( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3c( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup") monotonic, align 4 void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); @@ -123,15 +175,24 @@ void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden void @fi3d( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3d( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("subgroup") monotonic, align 4 void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); @@ -143,15 +204,24 @@ void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden void @fi3e( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3e( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread") monotonic, align 4 void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); @@ -163,8 +233,10 @@ void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden zeroext i1 @fi4a( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4a( +// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4a( +// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] acquire acquire, align 4 _Bool fi4a(int *i) { int cmp = 0; int desired = 1; @@ -173,8 +245,10 @@ _Bool fi4a(int *i) { __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi4b( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4b( +// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4b( +// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("device") acquire acquire, align 4 _Bool fi4b(int *i) { int cmp = 0; int desired = 1; @@ -183,8 +257,10 @@ _Bool fi4b(int *i) { __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi4c( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4c( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4c( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup") acquire acquire, align 4 _Bool fi4c(int *i) { int cmp = 0; int desired = 1; @@ -193,8 +269,10 @@ _Bool fi4c(int *i) { __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi4d( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4d( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4d( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("subgroup") acquire acquire, align 4 _Bool fi4d(int *i) { int cmp = 0; int desired = 1; @@ -203,8 +281,10 @@ _Bool fi4d(int *i) { __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi4e( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4e( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4e( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread") acquire acquire, align 4 _Bool fi4e(int *i) { int cmp = 0; int desired = 1; @@ -213,8 +293,10 @@ _Bool fi4e(int *i) { __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden zeroext i1 @fi5a( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5a( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5a( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] acquire acquire, align 4 _Bool fi5a(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, @@ -222,8 +304,10 @@ _Bool fi5a(int *i) { __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi5b( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5b( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5b( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("device") acquire acquire, align 4 _Bool fi5b(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, @@ -231,101 +315,127 @@ _Bool fi5b(int *i) { __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi5c( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5c( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5c( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup") acquire acquire, align 4 _Bool fi5c(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi5d( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5d( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5d( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("subgroup") acquire acquire, align 4 _Bool fi5d(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi5e( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5e( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5e( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread") acquire acquire, align 4 _Bool fi5e(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden i32 @fi6a( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6a( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] monotonic, align 4 int fi6a(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); return ret; } -// CHECK-LABEL: define hidden i32 @fi6b( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6b( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("device") monotonic, align 4 int fi6b(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); return ret; } -// CHECK-LABEL: define hidden i32 @fi6c( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6c( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup") monotonic, align 4 int fi6c(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); return ret; } -// CHECK-LABEL: define hidden i32 @fi6d( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6d( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("subgroup") monotonic, align 4 int fi6d(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); return ret; } -// CHECK-LABEL: define hidden i32 @fi6e( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6e( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread") monotonic, align 4 int fi6e(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); return ret; } -// CHECK-LABEL: define hidden zeroext i1 @fi7a( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7a( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] monotonic, align 1 _Bool fi7a(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi7b( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7b( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("device") monotonic, align 1 _Bool fi7b(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi7c( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7c( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup") monotonic, align 1 _Bool fi7c(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi7d( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7d( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("subgroup") monotonic, align 1 _Bool fi7d(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi7e( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7e( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread") monotonic, align 1 _Bool fi7e(_Bool *c) { - return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, + return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } diff --git a/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl b/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl new file mode 100644 index 0000000000000..5af2d807b4189 --- /dev/null +++ b/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl @@ -0,0 +1,235 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O3 -o - -triple=amdgcn-amd-amdhsa \ +// RUN: | FileCheck %s --check-prefix=AMDGCN +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O3 -o - -triple=spirv64-unknown-unknown \ +// RUN: | FileCheck %s --check-prefix=SPIRV + +// AMDGCN-LABEL: define dso_local i32 @load( +// AMDGCN-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = load atomic i32, ptr [[P]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @load( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = load atomic i32, ptr addrspace(4) [[P]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int load(int *p) { return __atomic_load_n(p, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local void @store( +// AMDGCN-SAME: ptr nocapture noundef writeonly [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: store atomic i32 [[X]], ptr [[P]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret void +// +// SPIRV-LABEL: define spir_func void @store( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef writeonly [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: store atomic i32 [[X]], ptr addrspace(4) [[P]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret void +// +void store(int *p, int x) { return __atomic_store_n(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @add( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw add ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @add( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw add ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int add(int *p, int x) { return __atomic_fetch_add(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fadd( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fadd( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fadd(float *p, float x) { return __atomic_fetch_add(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @sub( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @sub( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int sub(int *p, int x) { return __atomic_fetch_sub(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fsub( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fsub( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fsub(float *p, float x) { return __atomic_fetch_sub(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @and( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw and ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @and( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw and ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int and(int *p, int x) { return __atomic_fetch_and(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @nand( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw nand ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @nand( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw nand ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int nand(int *p, int x) { return __atomic_fetch_nand(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @or( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw or ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @or( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw or ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int or(int *p, int x) { return __atomic_fetch_or(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @xor( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @xor( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int xor(int *p, int x) { return __atomic_fetch_xor(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @min( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw min ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @min( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw min ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int min(int *p, int x) { return __atomic_fetch_min(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fmin( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmin ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fmin( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fmin ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fmin(float *p, float x) { return __atomic_fetch_min(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @max( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw max ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @max( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw max ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int max(int *p, int x) { return __atomic_fetch_max(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fmax( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmax ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fmax( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fmax ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fmax(float *p, float x) { return __atomic_fetch_max(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @xchg( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @xchg( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int xchg(int *p, int x) { return __atomic_exchange_n(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local range(i32 0, 2) i32 @cmpxchg( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = cmpxchg ptr [[P]], i32 [[X]], i32 [[Y]] syncscope("agent") seq_cst seq_cst, align 4 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// AMDGCN-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCN-NEXT: ret i32 [[CONV]] +// +// SPIRV-LABEL: define spir_func range(i32 0, 2) i32 @cmpxchg( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = cmpxchg ptr addrspace(4) [[P]], i32 [[X]], i32 [[Y]] syncscope("device") seq_cst seq_cst, align 4 +// SPIRV-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// SPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +int cmpxchg(int *p, int x, int y) { return __atomic_compare_exchange(p, &x, &y, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local range(i32 0, 2) i32 @cmpxchg_weak( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = cmpxchg weak ptr [[P]], i32 [[X]], i32 [[Y]] syncscope("agent") seq_cst seq_cst, align 4 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// AMDGCN-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCN-NEXT: ret i32 [[CONV]] +// +// SPIRV-LABEL: define spir_func range(i32 0, 2) i32 @cmpxchg_weak( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = cmpxchg weak ptr addrspace(4) [[P]], i32 [[X]], i32 [[Y]] syncscope("device") seq_cst seq_cst, align 4 +// SPIRV-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// SPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +int cmpxchg_weak(int *p, int x, int y) { return __atomic_compare_exchange(p, &x, &y, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } diff --git a/clang/test/Sema/scoped-atomic-ops.c b/clang/test/Sema/scoped-atomic-ops.c index 59e638c646664..33044aa256cb0 100644 --- a/clang/test/Sema/scoped-atomic-ops.c +++ b/clang/test/Sema/scoped-atomic-ops.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -x c -triple=amdgcn-amd-amdhsa -verify -fsyntax-only %s // RUN: %clang_cc1 -x c -triple=x86_64-pc-linux-gnu -verify -fsyntax-only %s +// RUN: %clang_cc1 -x c -triple=spirv64-unknown-unknown -verify -fsyntax-only %s int fi1a(int *i) { int v; diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 795ddf47c40da..86be79cbb5e7f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -1351,7 +1351,8 @@ Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { SmallVector Args; for (auto &Op : I.operands()) Args.push_back(Op); - Args.push_back(B.getInt32(I.getSyncScopeID())); + Args.push_back(B.getInt32( + static_cast(getMemScope(I.getContext(), I.getSyncScopeID())))); Args.push_back(B.getInt32( static_cast(getMemSemantics(I.getSuccessOrdering())))); Args.push_back(B.getInt32( diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 7af92b87ce00c..e475810f92f71 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -33,27 +33,6 @@ #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Support/Debug.h" -namespace { - -struct SyncScopeIDs { - llvm::SyncScope::ID Work_ItemSSID; - llvm::SyncScope::ID WorkGroupSSID; - llvm::SyncScope::ID DeviceSSID; - llvm::SyncScope::ID AllSVMDevicesSSID; - llvm::SyncScope::ID SubGroupSSID; - - SyncScopeIDs() {} - SyncScopeIDs(llvm::LLVMContext &Context) { - Work_ItemSSID = Context.getOrInsertSyncScopeID("work_item"); - WorkGroupSSID = Context.getOrInsertSyncScopeID("workgroup"); - DeviceSSID = Context.getOrInsertSyncScopeID("device"); - AllSVMDevicesSSID = Context.getOrInsertSyncScopeID("all_svm_devices"); - SubGroupSSID = Context.getOrInsertSyncScopeID("sub_group"); - } -}; - -} // namespace - #define DEBUG_TYPE "spirv-isel" using namespace llvm; @@ -76,7 +55,6 @@ class SPIRVInstructionSelector : public InstructionSelector { const RegisterBankInfo &RBI; SPIRVGlobalRegistry &GR; MachineRegisterInfo *MRI; - SyncScopeIDs SSIDs; MachineFunction *HasVRegsReset = nullptr; /// We need to keep track of the number we give to anonymous global values to @@ -305,7 +283,6 @@ void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - SSIDs = SyncScopeIDs(MF.getFunction().getContext()); MRI = &MF.getRegInfo(); GR.setCurrentFunc(MF); InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); @@ -845,29 +822,6 @@ bool SPIRVInstructionSelector::selectBitcast(Register ResVReg, return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast); } -static SPIRV::Scope::Scope getScope(SyncScope::ID Ord, - const SyncScopeIDs &SSIDs) { - if (Ord == SyncScope::SingleThread || Ord == SSIDs.Work_ItemSSID) - return SPIRV::Scope::Invocation; - else if (Ord == SyncScope::System || Ord == SSIDs.DeviceSSID) - return SPIRV::Scope::Device; - else if (Ord == SSIDs.WorkGroupSSID) - return SPIRV::Scope::Workgroup; - else if (Ord == SSIDs.AllSVMDevicesSSID) - return SPIRV::Scope::CrossDevice; - else if (Ord == SSIDs.SubGroupSSID) - return SPIRV::Scope::Subgroup; - else - // OpenCL approach is: "The functions that do not have memory_scope argument - // have the same semantics as the corresponding functions with the - // memory_scope argument set to memory_scope_device." See ref.: // - // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions - // In our case if the scope is unknown, assuming that SPIR-V code is to be - // consumed in an OpenCL environment, we use the same approach and set the - // scope to memory_scope_device. - return SPIRV::Scope::Device; -} - static void addMemoryOperands(MachineMemOperand *MemOp, MachineInstrBuilder &MIB) { uint32_t SpvMemOp = static_cast(SPIRV::MemoryOperand::None); @@ -1020,8 +974,8 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg, unsigned NegateOpcode) const { assert(I.hasOneMemOperand()); const MachineMemOperand *MemOp = *I.memoperands_begin(); - uint32_t Scope = - static_cast(getScope(MemOp->getSyncScopeID(), SSIDs)); + uint32_t Scope = static_cast(getMemScope( + GR.CurMF->getFunction().getContext(), MemOp->getSyncScopeID())); Register ScopeReg = buildI32Constant(Scope, I); Register Ptr = I.getOperand(1).getReg(); @@ -1092,7 +1046,8 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const { uint32_t MemSem = static_cast(getMemSemantics(AO)); Register MemSemReg = buildI32Constant(MemSem, I); SyncScope::ID Ord = SyncScope::ID(I.getOperand(1).getImm()); - uint32_t Scope = static_cast(getScope(Ord, SSIDs)); + uint32_t Scope = static_cast( + getMemScope(GR.CurMF->getFunction().getContext(), Ord)); Register ScopeReg = buildI32Constant(Scope, I); MachineBasicBlock &BB = *I.getParent(); return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpMemoryBarrier)) @@ -1111,8 +1066,8 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg, if (!isa(I)) { assert(I.hasOneMemOperand()); const MachineMemOperand *MemOp = *I.memoperands_begin(); - unsigned Scope = - static_cast(getScope(MemOp->getSyncScopeID(), SSIDs)); + unsigned Scope = static_cast(getMemScope( + GR.CurMF->getFunction().getContext(), MemOp->getSyncScopeID())); ScopeReg = buildI32Constant(Scope, I); unsigned ScSem = static_cast( diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 53601e402c737..aec144f6f0586 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -253,6 +253,31 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) { llvm_unreachable(nullptr); } +SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) { + static const struct { + // Named by + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id. + // We don't need aliases for Invocation and CrossDevice, as we already have + // them covered by "singlethread" and "" strings respectively (see + // implementation of LLVMContext::LLVMContext()). + llvm::SyncScope::ID SubGroup = Ctx.getOrInsertSyncScopeID("subgroup"); + llvm::SyncScope::ID WorkGroup = Ctx.getOrInsertSyncScopeID("workgroup"); + llvm::SyncScope::ID Device = Ctx.getOrInsertSyncScopeID("device"); + } SSIDs{}; + + if (Id == llvm::SyncScope::SingleThread) + return SPIRV::Scope::Invocation; + else if (Id == llvm::SyncScope::System) + return SPIRV::Scope::CrossDevice; + else if (Id == SSIDs.SubGroup) + return SPIRV::Scope::Subgroup; + else if (Id == SSIDs.WorkGroup) + return SPIRV::Scope::Workgroup; + else if (Id == SSIDs.Device) + return SPIRV::Scope::Device; + return SPIRV::Scope::CrossDevice; +} + MachineInstr *getDefInstrMaybeConstant(Register &ConstReg, const MachineRegisterInfo *MRI) { MachineInstr *MI = MRI->getVRegDef(ConstReg); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 93d64a7f435e9..7c7616000d22b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -145,6 +145,8 @@ getMemSemanticsForStorageClass(SPIRV::StorageClass::StorageClass SC); SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord); +SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id); + // Find def instruction for the given ConstReg, walking through // spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def // of OpConstant instruction. diff --git a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll index 323afec7f35f8..f8207c56a5656 100644 --- a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll +++ b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll @@ -1,7 +1,7 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; CHECK-SPIRV: %[[#Int:]] = OpTypeInt 32 0 -; CHECK-SPIRV-DAG: %[[#MemScope_Device:]] = OpConstant %[[#Int]] 1 +; CHECK-SPIRV-DAG: %[[#MemScope_CrossDevice:]] = OpConstant %[[#Int]] 0 ; CHECK-SPIRV-DAG: %[[#MemSemEqual_SeqCst:]] = OpConstant %[[#Int]] 16 ; CHECK-SPIRV-DAG: %[[#MemSemUnequal_Acquire:]] = OpConstant %[[#Int]] 2 ; CHECK-SPIRV-DAG: %[[#Constant_456:]] = OpConstant %[[#Int]] 456 @@ -11,7 +11,7 @@ ; CHECK-SPIRV-DAG: %[[#UndefStruct:]] = OpUndef %[[#Struct]] ; CHECK-SPIRV: %[[#Value:]] = OpLoad %[[#Int]] %[[#Value_ptr:]] -; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_Device]] +; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_CrossDevice]] ; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Value]] %[[#Comparator:]] ; CHECK-SPIRV: %[[#Success:]] = OpIEqual %[[#]] %[[#Res]] %[[#Comparator]] ; CHECK-SPIRV: %[[#Composite_0:]] = OpCompositeInsert %[[#Struct]] %[[#Res]] %[[#UndefStruct]] 0 @@ -34,7 +34,7 @@ cmpxchg.continue: ; preds = %cmpxchg.store_expec ret void } -; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_Device]] +; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_CrossDevice]] ; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Constant_456]] %[[#Constant_128]] ; CHECK-SPIRV: %[[#Success_1:]] = OpIEqual %[[#]] %[[#Res_1]] %[[#Constant_128]] ; CHECK-SPIRV: %[[#Composite:]] = OpCompositeInsert %[[#Struct]] %[[#Res_1]] %[[#UndefStruct]] 0 diff --git a/llvm/test/CodeGen/SPIRV/atomicrmw.ll b/llvm/test/CodeGen/SPIRV/atomicrmw.ll index 5f95a974ba671..07576056117cb 100644 --- a/llvm/test/CodeGen/SPIRV/atomicrmw.ll +++ b/llvm/test/CodeGen/SPIRV/atomicrmw.ll @@ -5,8 +5,7 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#Int:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1{{$}} -; CHECK-DAG: %[[#MemSem_Relaxed:]] = OpConstant %[[#Int]] 0 +; CHECK-DAG: %[[#Scope_CrossDevice:]] = OpConstant %[[#Int]] 0{{$}} ; CHECK-DAG: %[[#MemSem_Acquire:]] = OpConstant %[[#Int]] 2 ; CHECK-DAG: %[[#MemSem_Release:]] = OpConstant %[[#Int]] 4{{$}} ; CHECK-DAG: %[[#MemSem_AcquireRelease:]] = OpConstant %[[#Int]] 8 @@ -25,37 +24,37 @@ define dso_local spir_func void @test_atomicrmw() local_unnamed_addr { entry: %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 seq_cst -; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_Device]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]] +; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_CrossDevice]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]] %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 monotonic -; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Relaxed]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %{{.+}} %[[#Value]] %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 acquire -; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Acquire]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Acquire]] %[[#Value]] %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 release -; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Release]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Release]] %[[#Value]] %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 seq_cst -; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]] %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 monotonic -; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Relaxed]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %{{.*}} %[[#Value]] %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 acquire -; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Acquire]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Acquire]] %[[#Value]] %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 release -; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Release]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Release]] %[[#Value]] %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] ret void } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll index 14035a68c81aa..c2ed2f8f62fc8 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll @@ -10,13 +10,14 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP64]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP64]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP64Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP64]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP64Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP64]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll index d34811496e5a1..075e63ea6de61 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll @@ -10,15 +10,16 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP32]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP32]] 42 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] +; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 ; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[ScopeWorkgroup:[0-9]+]] = OpConstant %[[TyInt32]] 2 -; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 ; CHECK-DAG: %[[WorkgroupMemory:[0-9]+]] = OpConstant %[[TyInt32]] 512 ; CHECK-DAG: %[[TyFP32Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP32]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP32Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP32]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeWorkgroup]] %[[WorkgroupMemory]] %[[Const42]] ; CHECK: %[[Neg42:[0-9]+]] = OpFNegate %[[TyFP32]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll index 7da99411ae530..2c938409846d3 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll @@ -13,13 +13,14 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP16]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP16]] 20800 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP16]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP16Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP16]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll index a2d0a594c861d..fdc05f4eac06b 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP64]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP64]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP64Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP64]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP64Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] -; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll index 896b7acc1c87b..a7ff448a98b98 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP32]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP32]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP32Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP32]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP32Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] -; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll index b3f48711707a1..d5576d1911a8b 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP16]] 0 ; CHECK-DAG: %[[ConstHalf:[0-9]+]] = OpConstant %[[TyFP16]] 20800 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP16]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP16Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] -; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] +; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstHalf]] +; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstHalf]] ; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] ; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] diff --git a/llvm/test/CodeGen/SPIRV/fence.ll b/llvm/test/CodeGen/SPIRV/fence.ll index 5da58667f24f2..c7496c15f2c95 100644 --- a/llvm/test/CodeGen/SPIRV/fence.ll +++ b/llvm/test/CodeGen/SPIRV/fence.ll @@ -3,16 +3,16 @@ ; CHECK-DAG: OpName %[[#GetScope:]] "_Z8getScopev" ; CHECK-DAG: %[[#Long:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#ScopeDevice:]] = OpConstant %[[#Long]] 1 ; CHECK-DAG: %[[#WrkGrpConst2:]] = OpConstant %[[#Long]] 2 -; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Long]] 3 +; CHECK-DAG: %[[#ScopeAllSvmDevices:]] = OpConstantNull %[[#Long]] ; CHECK-DAG: %[[#InvocationConst4:]] = OpConstant %[[#Long]] 4 ; CHECK-DAG: %[[#Const8:]] = OpConstant %[[#Long]] 8 ; CHECK-DAG: %[[#Const16:]] = OpConstant %[[#Long]] 16 +; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Long]] 3 ; CHECK-DAG: %[[#Const912:]] = OpConstant %[[#Long]] 912 -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#WrkGrpConst2]] -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#InvocationConst4]] -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#Const8]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#WrkGrpConst2]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#InvocationConst4]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#Const8]] ; CHECK: OpMemoryBarrier %[[#InvocationConst4]] %[[#Const16]] ; CHECK: OpMemoryBarrier %[[#WrkGrpConst2]] %[[#InvocationConst4]] ; CHECK: OpFunctionEnd diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll index 9469d24b20af2..54d0843cbf234 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll @@ -9,7 +9,7 @@ ; CHECK-DAG: %[[#LongTy:]] = OpTypeInt 64 0 ; CHECK-DAG: %[[#PtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#LongTy]] ; CHECK-DAG: %[[#IntTy:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#Scope:]] = OpConstant %[[#IntTy]] 1 +; CHECK-DAG: %[[#Scope:]] = OpConstantNull %[[#IntTy]] ; CHECK-DAG: %[[#MemSem:]] = OpConstant %[[#IntTy]] 8 ; CHECK-DAG: %[[#PtrPtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#PtrLongTy]] diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll index 8c5c036351d97..f4e7b128f77a3 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll @@ -18,16 +18,15 @@ ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] ; CHECK-DAG: [[I64Ty:%.*]] = OpTypeInt 64 0 ; CHECK-DAG: [[PtrI64Ty:%.*]] = OpTypePointer Generic [[I64Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +; CHECK-DAG: [[CROSSDEVICESCOPE:%.*]] = OpConstantNull [[I32Ty]] +; CHECK-DAG: [[DEVICESCOPE:%.*]] = OpConstant [[I32Ty]] 1 ;; "monotonic" maps to the relaxed memory semantics, encoded with constant 0 -; CHECK-DAG: [[RELAXED:%.*]] = OpConstantNull [[I32Ty]] ; CHECK: [[ADD]] = OpFunction [[I32Ty]] ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_add(i32* %ptr, i32 %val) { @@ -39,7 +38,7 @@ define i32 @test_add(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_sub(i32* %ptr, i32 %val) { @@ -51,7 +50,7 @@ define i32 @test_sub(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_min(i32* %ptr, i32 %val) { @@ -63,7 +62,7 @@ define i32 @test_min(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_max(i32* %ptr, i32 %val) { @@ -75,7 +74,7 @@ define i32 @test_max(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umin(i32* %ptr, i32 %val) { @@ -87,7 +86,7 @@ define i32 @test_umin(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umax(i32* %ptr, i32 %val) { @@ -99,7 +98,7 @@ define i32 @test_umax(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_and(i32* %ptr, i32 %val) { @@ -111,7 +110,7 @@ define i32 @test_and(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_or(i32* %ptr, i32 %val) { @@ -123,7 +122,7 @@ define i32 @test_or(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_xor(i32* %ptr, i32 %val) { @@ -135,10 +134,10 @@ define i32 @test_xor(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[Arg1:%.*]] = OpFunctionParameter [[PtrI64Ty]] ; CHECK-NEXT: [[Arg2:%.*]] = OpFunctionParameter [[I64Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] +; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] ; CHECK-NEXT: OpReturn ; CHECK-NEXT: OpFunctionEnd define dso_local spir_kernel void @test_wrappers(ptr addrspace(4) %arg, i64 %val) { diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll index 07d1a5cf662ec..4d5aca6d404de 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll @@ -13,8 +13,8 @@ ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0 ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +;; AllSvmDevices scope is encoded with constant 0 +; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]] ;; "acq_rel" maps to the constant 8 ; CHECK-DAG: [[ACQREL:%.*]] = OpConstant [[I32Ty]] 8 diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll index 4078ffe1a10b8..9fd3d8e95b5f1 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll @@ -13,8 +13,8 @@ ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0 ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +;; AllSvmDevices scope is encoded with constant 0 +; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]] ;; "sequentially consistent" maps to constant 16 ; CHECK-DAG: [[SEQ:%.*]] = OpConstant [[I32Ty]] 16 diff --git a/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll new file mode 100644 index 0000000000000..130db18534832 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll @@ -0,0 +1,163 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#Int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#Scope_CrossDevice:]] = OpConstant %[[#Int]] 0 +; CHECK-DAG: %[[#Value:]] = OpConstant %[[#Int]] 42 +; CHECK-DAG: %[[#FPValue:]] = OpConstant %[[#Float]] 42 +; CHECK-DAG: %[[#Scope_Invocation:]] = OpConstant %[[#Int]] 4 +; CHECK-DAG: %[[#MemSem_SeqCst:]] = OpConstant %[[#Int]] 16 +; CHECK-DAG: %[[#Scope_Subgroup:]] = OpConstant %[[#Int]] 3 +; CHECK-DAG: %[[#Scope_Workgroup:]] = OpConstant %[[#Int]] 2 +; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1 +; CHECK-DAG: %[[#PointerType:]] = OpTypePointer CrossWorkgroup %[[#Int]] +; CHECK-DAG: %[[#FPPointerType:]] = OpTypePointer CrossWorkgroup %[[#Float]] +; CHECK-DAG: %[[#Pointer:]] = OpVariable %[[#PointerType]] CrossWorkgroup +; CHECK-DAG: %[[#FPPointer:]] = OpVariable %[[#FPPointerType]] CrossWorkgroup + +@ui = common dso_local addrspace(1) global i32 0, align 4 +@f = common dso_local local_unnamed_addr addrspace(1) global float 0.000000e+00, align 4 + +define dso_local spir_func void @test_singlethread_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_subgroup_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_workgroup_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_device_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_all_svm_devices_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} From 4494c54326fe33d19c14df851188f867b14ded2a Mon Sep 17 00:00:00 2001 From: Ryan Mansfield Date: Tue, 24 Sep 2024 19:48:27 -0400 Subject: [PATCH 065/212] [lldb] Fix typos in various help messages. (#109851) --- lldb/source/Commands/CommandObjectFrame.cpp | 2 +- lldb/source/Commands/CommandObjectProcess.cpp | 2 +- lldb/source/Commands/CommandObjectScripting.cpp | 2 +- lldb/source/Commands/Options.td | 2 +- lldb/source/Interpreter/CommandInterpreter.cpp | 2 +- lldb/source/Target/TargetProperties.td | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index 142f96946ed3d..e2203292e71e2 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -1223,7 +1223,7 @@ CommandObjectMultiwordFrame::CommandObjectMultiwordFrame( CommandInterpreter &interpreter) : CommandObjectMultiword(interpreter, "frame", "Commands for selecting and " - "examing the current " + "examining the current " "thread's stack frames.", "frame []") { LoadSubCommand("diagnose", diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 5b0f4f66f248b..e7c7d07ad4772 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -1420,7 +1420,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed { PlatformSP platform_sp = process->GetTarget().GetPlatform(); if (!platform_sp) { - result.AppendError("Couldn'retrieve the target's platform"); + result.AppendError("Couldn't retrieve the target's platform"); return; } diff --git a/lldb/source/Commands/CommandObjectScripting.cpp b/lldb/source/Commands/CommandObjectScripting.cpp index 9a1a2b63c7af0..1f8ee0a9554ec 100644 --- a/lldb/source/Commands/CommandObjectScripting.cpp +++ b/lldb/source/Commands/CommandObjectScripting.cpp @@ -254,7 +254,7 @@ CommandObjectMultiwordScripting::CommandObjectMultiwordScripting( CommandInterpreter &interpreter) : CommandObjectMultiword( interpreter, "scripting", - "Commands for operating on the scripting functionnalities.", + "Commands for operating on the scripting functionalities.", "scripting []") { LoadSubCommand("run", CommandObjectSP(new CommandObjectScriptingRun(interpreter))); diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index df906e9d7c808..4276d9e7f9c8b 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -1199,7 +1199,7 @@ let Command = "thread trace dump instructions" in { def thread_trace_dump_instruction_only_events : Option<"only-events", "E">, Group<1>, Desc<"Dump only the events that happened during the execution of the " - "target. No instrutions are dumped.">; + "target. No instructions are dumped.">; def thread_trace_dump_instructions_continue: Option<"continue", "C">, Group<1>, Desc<"Continue dumping instructions right where the previous invocation of " diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index b93f47a8a8d5e..acd592c3bd2db 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -797,7 +797,7 @@ void CommandInterpreter::LoadCommandDictionary() { new CommandObjectRegexCommand( *this, "gdb-remote", "Connect to a process via remote GDB server.\n" - "If no host is specifed, localhost is assumed.\n" + "If no host is specified, localhost is assumed.\n" "gdb-remote is an abbreviation for 'process connect --plugin " "gdb-remote connect://:'\n", "gdb-remote [:]", 0, false)); diff --git a/lldb/source/Target/TargetProperties.td b/lldb/source/Target/TargetProperties.td index 0f68deb543f90..fb61478fb752d 100644 --- a/lldb/source/Target/TargetProperties.td +++ b/lldb/source/Target/TargetProperties.td @@ -235,7 +235,7 @@ let Definition = "process" in { def DisableLangRuntimeUnwindPlans: Property<"disable-language-runtime-unwindplans", "Boolean">, Global, DefaultFalse, - Desc<"If true, language runtime augmented/overidden backtraces will not be used when printing a stack trace.">; + Desc<"If true, language runtime augmented/overridden backtraces will not be used when printing a stack trace.">; def DetachKeepsStopped: Property<"detach-keeps-stopped", "Boolean">, Global, DefaultFalse, From bde2357f71dfd0ab9921d7e1a5d1ae12d8c44305 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 24 Sep 2024 16:54:36 -0700 Subject: [PATCH 066/212] [LLVM][TableGen] Rename Option emitter files (#109216) Rename OptXXXEmitter.cpp to OptionXXXEmitter.cpp to have a less ambiguous name, as `Opt` could also mean optimization. --- llvm/include/llvm/Option/OptTable.h | 2 +- llvm/unittests/Option/OptionMarshallingTest.cpp | 2 +- llvm/utils/TableGen/CMakeLists.txt | 4 ++-- .../{OptParserEmitter.cpp => OptionParserEmitter.cpp} | 8 ++++---- .../{OptRSTEmitter.cpp => OptionRSTEmitter.cpp} | 10 +++++----- llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) rename llvm/utils/TableGen/{OptParserEmitter.cpp => OptionParserEmitter.cpp} (98%) rename llvm/utils/TableGen/{OptRSTEmitter.cpp => OptionRSTEmitter.cpp} (89%) diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index d8bf292bac21a..8fabc78d81aed 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -64,7 +64,7 @@ class OptTable { // the program, HelpText is used instead. This cannot use std::vector // because OptTable is used in constexpr contexts. Increase the array sizes // here if you need more entries and adjust the constants in - // OptParserEmitter::EmitHelpTextsForVariants. + // OptionParserEmitter::EmitHelpTextsForVariants. std::array, const char *>, 1 /*MaxVisibilityHelp*/> diff --git a/llvm/unittests/Option/OptionMarshallingTest.cpp b/llvm/unittests/Option/OptionMarshallingTest.cpp index 0464e27d5248a..2ec422f1a0984 100644 --- a/llvm/unittests/Option/OptionMarshallingTest.cpp +++ b/llvm/unittests/Option/OptionMarshallingTest.cpp @@ -1,4 +1,4 @@ -//===- unittest/Support/OptionMarshallingTest.cpp - OptParserEmitter tests ===// +//===- OptionMarshallingTest.cpp - OptionParserEmitter tests -================// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/utils/TableGen/CMakeLists.txt b/llvm/utils/TableGen/CMakeLists.txt index abebb98761d06..ba1e4aa01b48d 100644 --- a/llvm/utils/TableGen/CMakeLists.txt +++ b/llvm/utils/TableGen/CMakeLists.txt @@ -59,8 +59,8 @@ add_tablegen(llvm-tblgen LLVM InstrInfoEmitter.cpp IntrinsicEmitter.cpp MacroFusionPredicatorEmitter.cpp - OptParserEmitter.cpp - OptRSTEmitter.cpp + OptionParserEmitter.cpp + OptionRSTEmitter.cpp PseudoLoweringEmitter.cpp RegisterBankEmitter.cpp RegisterInfoEmitter.cpp diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp similarity index 98% rename from llvm/utils/TableGen/OptParserEmitter.cpp rename to llvm/utils/TableGen/OptionParserEmitter.cpp index 79cbf51514ae5..5ae6f773a3c60 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptionParserEmitter.cpp @@ -1,4 +1,4 @@ -//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===// +//===- OptionParserEmitter.cpp - Table Driven Command Option Line Parsing -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -247,10 +247,10 @@ static void EmitHelpTextsForVariants( OS << " }})"; } -/// OptParserEmitter - This tablegen backend takes an input .td file +/// OptionParserEmitter - This tablegen backend takes an input .td file /// describing a list of options and emits a data structure for parsing and /// working with those options when given an input command line. -static void EmitOptParser(const RecordKeeper &Records, raw_ostream &OS) { +static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { // Get the option groups and options. ArrayRef Groups = Records.getAllDerivedDefinitions("OptionGroup"); @@ -572,5 +572,5 @@ static void EmitOptParser(const RecordKeeper &Records, raw_ostream &OS) { OS << "\n"; } -static TableGen::Emitter::Opt X("gen-opt-parser-defs", EmitOptParser, +static TableGen::Emitter::Opt X("gen-opt-parser-defs", EmitOptionParser, "Generate option definitions"); diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptionRSTEmitter.cpp similarity index 89% rename from llvm/utils/TableGen/OptRSTEmitter.cpp rename to llvm/utils/TableGen/OptionRSTEmitter.cpp index 16125198a7c38..b798896a80963 100644 --- a/llvm/utils/TableGen/OptRSTEmitter.cpp +++ b/llvm/utils/TableGen/OptionRSTEmitter.cpp @@ -1,4 +1,4 @@ -//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===// +//===- OptionRSTEmitter.cpp - Table Driven Command Line Option Parsing ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,9 +14,9 @@ using namespace llvm; -/// OptParserEmitter - This tablegen backend takes an input .td file -/// describing a list of options and emits a RST man page. -static void EmitOptRST(const RecordKeeper &Records, raw_ostream &OS) { +/// This tablegen backend takes an input .td file describing a list of options +/// and emits a RST man page. +static void EmitOptionRST(const RecordKeeper &Records, raw_ostream &OS) { llvm::StringMap> OptionsByGroup; std::vector OptionsWithoutGroup; @@ -97,5 +97,5 @@ static void EmitOptRST(const RecordKeeper &Records, raw_ostream &OS) { } } -static TableGen::Emitter::Opt X("gen-opt-rst", EmitOptRST, +static TableGen::Emitter::Opt X("gen-opt-rst", EmitOptionRST, "Generate option RST"); diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index 2e11d25767cd0..e93250d7f0b7c 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -56,8 +56,8 @@ executable("llvm-tblgen") { "InstrDocsEmitter.cpp", "InstrInfoEmitter.cpp", "MacroFusionPredicatorEmitter.cpp", - "OptParserEmitter.cpp", - "OptRSTEmitter.cpp", + "OptionParserEmitter.cpp", + "OptionRSTEmitter.cpp", "PseudoLoweringEmitter.cpp", "RegisterBankEmitter.cpp", "RegisterInfoEmitter.cpp", From b62075e0290b5fd626e49cb901b8bc4ac09fdd60 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Tue, 24 Sep 2024 16:56:24 -0700 Subject: [PATCH 067/212] [WebAssembly] Allow AsmTypeCheck detect multiple errors in function (#109705) This allows multiple errors to be reported within a function, rather than returning on the first error and not looking at the rest of the function. I think the rationale for the previous behavior was that upon encountering the first error, the value stack was not in the correct status anymore and the rest of the function checking was not very meaningful. But this patch makes the instruction push the correct result type upon its completion, so the while the possibility of previous error affecting later instructions is not zero, I think this can be more helpful to assembly hand-writers. This also allows us to write multiple error test cases without creating as many functions. This is what Wabt and Binaryen wast checker/validator do as well. Also this makes sure we return a value (true/false) within an `if` for each instruction, removing the need for the long `if`-`else if`-`else if` chain and making them all just `if`s. I also added newlines between the `if`s, which I feel is easier to read. --- .../AsmParser/WebAssemblyAsmTypeCheck.cpp | 348 ++++++++++-------- .../AsmParser/WebAssemblyAsmTypeCheck.h | 2 - .../test/MC/WebAssembly/type-checker-errors.s | 22 ++ 3 files changed, 214 insertions(+), 158 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp index 4ff19a9cacfb3..8b1e1dca4f847 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp @@ -70,14 +70,9 @@ void WebAssemblyAsmTypeCheck::dumpTypeStack(Twine Msg) { } bool WebAssemblyAsmTypeCheck::typeError(SMLoc ErrorLoc, const Twine &Msg) { - // Once you get one type error in a function, it will likely trigger more - // which are mostly not helpful. - if (TypeErrorThisFunction) - return true; // If we're currently in unreachable code, we suppress errors completely. if (Unreachable) return false; - TypeErrorThisFunction = true; dumpTypeStack("current stack: "); return Parser.Error(ErrorLoc, Msg); } @@ -171,11 +166,11 @@ bool WebAssemblyAsmTypeCheck::checkEnd(SMLoc ErrorLoc, bool PopVals) { bool WebAssemblyAsmTypeCheck::checkSig(SMLoc ErrorLoc, const wasm::WasmSignature &Sig) { + bool Error = false; for (auto VT : llvm::reverse(Sig.Params)) - if (popType(ErrorLoc, VT)) - return true; + Error |= popType(ErrorLoc, VT); Stack.insert(Stack.end(), Sig.Returns.begin(), Sig.Returns.end()); - return false; + return Error; } bool WebAssemblyAsmTypeCheck::getSymRef(SMLoc ErrorLoc, const MCOperand &SymOp, @@ -260,17 +255,16 @@ bool WebAssemblyAsmTypeCheck::getSignature(SMLoc ErrorLoc, } bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) { + bool Error = false; // Check the return types. - for (auto RVT : llvm::reverse(ReturnTypes)) { - if (popType(ErrorLoc, RVT)) - return true; - } + for (auto RVT : llvm::reverse(ReturnTypes)) + Error |= popType(ErrorLoc, RVT); if (!Stack.empty()) { return typeError(ErrorLoc, std::to_string(Stack.size()) + " superfluous return values"); } Unreachable = true; - return false; + return Error; } bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, @@ -279,179 +273,221 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst, auto Name = getMnemonic(Opc); dumpTypeStack("typechecking " + Name + ": "); wasm::ValType Type; + if (Name == "local.get") { - if (getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - Stack.push_back(Type); - } else if (Name == "local.set") { - if (getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "local.tee") { - if (getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - Stack.push_back(Type); - } else if (Name == "global.get") { - if (getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - Stack.push_back(Type); - } else if (Name == "global.set") { - if (getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "table.get") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - Stack.push_back(Type); - } else if (Name == "table.set") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - } else if (Name == "table.size") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; + if (!getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { + Stack.push_back(Type); + return false; + } + return true; + } + + if (Name == "local.set") { + if (!getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + return popType(ErrorLoc, Type); + return true; + } + + if (Name == "local.tee") { + if (!getLocal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { + bool Error = popType(ErrorLoc, Type); + Stack.push_back(Type); + return Error; + } + return true; + } + + if (Name == "global.get") { + if (!getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { + Stack.push_back(Type); + return false; + } + return true; + } + + if (Name == "global.set") { + if (!getGlobal(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + return popType(ErrorLoc, Type); + return true; + } + + if (Name == "table.get") { + bool Error = popType(ErrorLoc, wasm::ValType::I32); + if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) { + Stack.push_back(Type); + return Error; + } + return true; + } + + if (Name == "table.set") { + bool Error = false; + if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + Error |= popType(ErrorLoc, Type); + else + Error = true; + Error |= popType(ErrorLoc, wasm::ValType::I32); + return Error; + } + + if (Name == "table.size") { + bool Error = getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type); Stack.push_back(wasm::ValType::I32); - } else if (Name == "table.grow") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, Type)) - return true; + return Error; + } + + if (Name == "table.grow") { + bool Error = popType(ErrorLoc, wasm::ValType::I32); + if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + Error |= popType(ErrorLoc, Type); + else + Error = true; Stack.push_back(wasm::ValType::I32); - } else if (Name == "table.fill") { - if (getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - } else if (Name == "memory.fill") { + return Error; + } + + if (Name == "table.fill") { + bool Error = popType(ErrorLoc, wasm::ValType::I32); + if (!getTable(Operands[1]->getStartLoc(), Inst.getOperand(0), Type)) + Error |= popType(ErrorLoc, Type); + else + Error = true; + Error |= popType(ErrorLoc, wasm::ValType::I32); + return Error; + } + + if (Name == "memory.fill") { Type = Is64 ? wasm::ValType::I64 : wasm::ValType::I32; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "memory.copy") { + bool Error = popType(ErrorLoc, Type); + Error |= popType(ErrorLoc, wasm::ValType::I32); + Error |= popType(ErrorLoc, Type); + return Error; + } + + if (Name == "memory.copy") { Type = Is64 ? wasm::ValType::I64 : wasm::ValType::I32; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "memory.init") { + bool Error = popType(ErrorLoc, Type); + Error |= popType(ErrorLoc, Type); + Error |= popType(ErrorLoc, Type); + return Error; + } + + if (Name == "memory.init") { Type = Is64 ? wasm::ValType::I64 : wasm::ValType::I32; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (popType(ErrorLoc, Type)) - return true; - } else if (Name == "drop") { - if (popType(ErrorLoc, {})) - return true; - } else if (Name == "try" || Name == "block" || Name == "loop" || - Name == "if") { - if (Name == "if" && popType(ErrorLoc, wasm::ValType::I32)) - return true; + bool Error = popType(ErrorLoc, wasm::ValType::I32); + Error |= popType(ErrorLoc, wasm::ValType::I32); + Error |= popType(ErrorLoc, Type); + return Error; + } + + if (Name == "drop") { + return popType(ErrorLoc, {}); + } + + if (Name == "try" || Name == "block" || Name == "loop" || Name == "if") { if (Name == "loop") BrStack.emplace_back(LastSig.Params.begin(), LastSig.Params.end()); else BrStack.emplace_back(LastSig.Returns.begin(), LastSig.Returns.end()); - } else if (Name == "end_block" || Name == "end_loop" || Name == "end_if" || - Name == "else" || Name == "end_try" || Name == "catch" || - Name == "catch_all" || Name == "delegate") { - if (checkEnd(ErrorLoc, - Name == "else" || Name == "catch" || Name == "catch_all")) + if (Name == "if" && popType(ErrorLoc, wasm::ValType::I32)) return true; + return false; + } + + if (Name == "end_block" || Name == "end_loop" || Name == "end_if" || + Name == "else" || Name == "end_try" || Name == "catch" || + Name == "catch_all" || Name == "delegate") { + bool Error = checkEnd(ErrorLoc, Name == "else" || Name == "catch" || + Name == "catch_all"); Unreachable = false; if (Name == "catch") { const wasm::WasmSignature *Sig = nullptr; - if (getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), - wasm::WASM_SYMBOL_TYPE_TAG, Sig)) - return true; - // catch instruction pushes values whose types are specified in the tag's - // "params" part - Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end()); + if (!getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), + wasm::WASM_SYMBOL_TYPE_TAG, Sig)) + // catch instruction pushes values whose types are specified in the + // tag's "params" part + Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end()); + else + Error = true; } - } else if (Name == "br") { + return Error; + } + + if (Name == "br") { const MCOperand &Operand = Inst.getOperand(0); if (!Operand.isImm()) - return false; - if (checkBr(ErrorLoc, static_cast(Operand.getImm()))) - return true; - } else if (Name == "return") { - if (endOfFunction(ErrorLoc)) return true; - } else if (Name == "call_indirect" || Name == "return_call_indirect") { + return checkBr(ErrorLoc, static_cast(Operand.getImm())); + } + + if (Name == "return") { + return endOfFunction(ErrorLoc); + } + + if (Name == "call_indirect" || Name == "return_call_indirect") { // Function value. - if (popType(ErrorLoc, wasm::ValType::I32)) - return true; - if (checkSig(ErrorLoc, LastSig)) - return true; + bool Error = popType(ErrorLoc, wasm::ValType::I32); + Error |= checkSig(ErrorLoc, LastSig); if (Name == "return_call_indirect" && endOfFunction(ErrorLoc)) return true; - } else if (Name == "call" || Name == "return_call") { + return Error; + } + + if (Name == "call" || Name == "return_call") { + bool Error = false; const wasm::WasmSignature *Sig = nullptr; - if (getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), - wasm::WASM_SYMBOL_TYPE_FUNCTION, Sig)) - return true; - if (checkSig(ErrorLoc, *Sig)) - return true; + if (!getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), + wasm::WASM_SYMBOL_TYPE_FUNCTION, Sig)) + Error |= checkSig(ErrorLoc, *Sig); + else + Error = true; if (Name == "return_call" && endOfFunction(ErrorLoc)) return true; - } else if (Name == "unreachable") { + return Error; + } + + if (Name == "unreachable") { Unreachable = true; - } else if (Name == "ref.is_null") { - if (popRefType(ErrorLoc)) - return true; + return false; + } + + if (Name == "ref.is_null") { + bool Error = popRefType(ErrorLoc); Stack.push_back(wasm::ValType::I32); - } else if (Name == "throw") { + return Error; + } + + if (Name == "throw") { const wasm::WasmSignature *Sig = nullptr; - if (getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), - wasm::WASM_SYMBOL_TYPE_TAG, Sig)) - return true; - if (checkSig(ErrorLoc, *Sig)) - return true; - } else { - // The current instruction is a stack instruction which doesn't have - // explicit operands that indicate push/pop types, so we get those from - // the register version of the same instruction. - auto RegOpc = WebAssembly::getRegisterOpcode(Opc); - assert(RegOpc != -1 && "Failed to get register version of MC instruction"); - const auto &II = MII.get(RegOpc); - // First pop all the uses off the stack and check them. - for (unsigned I = II.getNumOperands(); I > II.getNumDefs(); I--) { - const auto &Op = II.operands()[I - 1]; - if (Op.OperandType == MCOI::OPERAND_REGISTER) { - auto VT = WebAssembly::regClassToValType(Op.RegClass); - if (popType(ErrorLoc, VT)) - return true; - } - } - // Now push all the defs onto the stack. - for (unsigned I = 0; I < II.getNumDefs(); I++) { - const auto &Op = II.operands()[I]; - assert(Op.OperandType == MCOI::OPERAND_REGISTER && "Register expected"); + if (!getSignature(Operands[1]->getStartLoc(), Inst.getOperand(0), + wasm::WASM_SYMBOL_TYPE_TAG, Sig)) + return checkSig(ErrorLoc, *Sig); + return true; + } + + // The current instruction is a stack instruction which doesn't have + // explicit operands that indicate push/pop types, so we get those from + // the register version of the same instruction. + auto RegOpc = WebAssembly::getRegisterOpcode(Opc); + assert(RegOpc != -1 && "Failed to get register version of MC instruction"); + const auto &II = MII.get(RegOpc); + bool Error = false; + // First pop all the uses off the stack and check them. + for (unsigned I = II.getNumOperands(); I > II.getNumDefs(); I--) { + const auto &Op = II.operands()[I - 1]; + if (Op.OperandType == MCOI::OPERAND_REGISTER) { auto VT = WebAssembly::regClassToValType(Op.RegClass); - Stack.push_back(VT); + Error |= popType(ErrorLoc, VT); } } - return false; + // Now push all the defs onto the stack. + for (unsigned I = 0; I < II.getNumDefs(); I++) { + const auto &Op = II.operands()[I]; + assert(Op.OperandType == MCOI::OPERAND_REGISTER && "Register expected"); + auto VT = WebAssembly::regClassToValType(Op.RegClass); + Stack.push_back(VT); + } + return Error; } } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h index 2a654d7982510..972162d3e02f4 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h @@ -33,7 +33,6 @@ class WebAssemblyAsmTypeCheck final { SmallVector LocalTypes; SmallVector ReturnTypes; wasm::WasmSignature LastSig; - bool TypeErrorThisFunction = false; bool Unreachable = false; bool Is64; @@ -68,7 +67,6 @@ class WebAssemblyAsmTypeCheck final { BrStack.clear(); LocalTypes.clear(); ReturnTypes.clear(); - TypeErrorThisFunction = false; Unreachable = false; } }; diff --git a/llvm/test/MC/WebAssembly/type-checker-errors.s b/llvm/test/MC/WebAssembly/type-checker-errors.s index e8b8274036a83..3106fe76c8449 100644 --- a/llvm/test/MC/WebAssembly/type-checker-errors.s +++ b/llvm/test/MC/WebAssembly/type-checker-errors.s @@ -93,12 +93,14 @@ global_set_type_mismatch: table_get_expected_expression_operand: .functype table_get_expected_expression_operand () -> () + i32.const 0 # CHECK: :[[@LINE+1]]:13: error: expected expression operand table.get 1 end_function table_get_missing_tabletype: .functype table_get_missing_tabletype () -> () + i32.const 0 # CHECK: :[[@LINE+1]]:13: error: symbol foo: missing .tabletype table.get foo end_function @@ -851,3 +853,23 @@ br_incorrect_func_signature: drop i32.const 1 end_function + +multiple_errors_in_function: + .functype multiple_errors_in_function () -> () +# CHECK: :[[@LINE+2]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+1]]:13: error: expected expression operand + table.get 1 + +# CHECK: :[[@LINE+3]]:3: error: empty stack while popping i32 +# CHECK: :[[@LINE+2]]:3: error: empty stack while popping externref +# CHECK: :[[@LINE+1]]:3: error: empty stack while popping i32 + table.fill valid_table + + f32.const 0.0 + ref.null_extern +# CHECK: :[[@LINE+2]]:3: error: popped externref, expected i32 +# CHECK: :[[@LINE+1]]:3: error: popped f32, expected i32 + i32.add + drop + + end_function From b15bd3fc653f061e3a69e1c42a3e5f5256aa1b50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 24 Sep 2024 17:04:54 -0700 Subject: [PATCH 068/212] [flang][cuda] Add global constructor for allocators registration (#109854) This pass creates the constructor function to call the allocator registration and adds it to the global_ctors. --- .../flang/Optimizer/Transforms/Passes.h | 1 + .../flang/Optimizer/Transforms/Passes.td | 7 ++ flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../Transforms/CUFAddConstructor.cpp | 75 +++++++++++++++++++ flang/test/Fir/CUDA/cuda-constructor.f90 | 12 +++ 5 files changed, 96 insertions(+) create mode 100644 flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp create mode 100644 flang/test/Fir/CUDA/cuda-constructor.f90 diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index fcfb8677951a2..3b2af3a339810 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -39,6 +39,7 @@ namespace fir { #define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION #define GEN_PASS_DECL_CHARACTERCONVERSION #define GEN_PASS_DECL_CFGCONVERSION +#define GEN_PASS_DECL_CUFADDCONSTRUCTOR #define GEN_PASS_DECL_CUFIMPLICITDEVICEGLOBAL #define GEN_PASS_DECL_CUFOPCONVERSION #define GEN_PASS_DECL_EXTERNALNAMECONVERSION diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index ab98591c911cd..bf75123e85377 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -436,4 +436,11 @@ def CufImplicitDeviceGlobal : ]; } +def CUFAddConstructor : Pass<"cuf-add-constructor", "mlir::ModuleOp"> { + let summary = "Add constructor to register CUDA Fortran allocators"; + let dependentDialects = [ + "mlir::func::FuncDialect" + ]; +} + #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index b68e3d68b9b83..5e1a0293e63c9 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -9,6 +9,7 @@ add_flang_library(FIRTransforms CompilerGeneratedNames.cpp ConstantArgumentGlobalisation.cpp ControlFlowConverter.cpp + CUFAddConstructor.cpp CufImplicitDeviceGlobal.cpp CufOpConversion.cpp ArrayValueCopy.cpp diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp new file mode 100644 index 0000000000000..48620fbc58586 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp @@ -0,0 +1,75 @@ +//===-- CUFAddConstructor.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Optimizer/Dialect/FIRAttr.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROpsSupport.h" +#include "flang/Runtime/entry-names.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Pass/Pass.h" +#include "llvm/ADT/SmallVector.h" + +namespace fir { +#define GEN_PASS_DEF_CUFADDCONSTRUCTOR +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +namespace { + +static constexpr llvm::StringRef cudaFortranCtorName{ + "__cudaFortranConstructor"}; + +struct CUFAddConstructor + : public fir::impl::CUFAddConstructorBase { + + void runOnOperation() override { + mlir::ModuleOp mod = getOperation(); + mlir::OpBuilder builder{mod.getBodyRegion()}; + builder.setInsertionPointToEnd(mod.getBody()); + mlir::Location loc = mod.getLoc(); + auto *ctx = mod.getContext(); + auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx); + auto funcTy = + mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false); + + // Symbol reference to CUFRegisterAllocator. + builder.setInsertionPointToEnd(mod.getBody()); + auto registerFuncOp = builder.create( + loc, RTNAME_STRING(CUFRegisterAllocator), funcTy); + registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private); + auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get( + mod.getContext(), RTNAME_STRING(CUFRegisterAllocator)); + builder.setInsertionPointToEnd(mod.getBody()); + + // Create the constructor function that cal CUFRegisterAllocator. + builder.setInsertionPointToEnd(mod.getBody()); + auto func = builder.create(loc, cudaFortranCtorName, + funcTy); + func.setLinkage(mlir::LLVM::Linkage::Internal); + builder.setInsertionPointToStart(func.addEntryBlock(builder)); + builder.create(loc, funcTy, cufRegisterAllocatorRef); + builder.create(loc, mlir::ValueRange{}); + + // Create the llvm.global_ctor with the function. + // TODO: We might want to have a utility that retrieve it if already created + // and adds new functions. + builder.setInsertionPointToEnd(mod.getBody()); + llvm::SmallVector funcs; + funcs.push_back( + mlir::FlatSymbolRefAttr::get(mod.getContext(), func.getSymName())); + llvm::SmallVector priorities; + priorities.push_back(0); + builder.create( + mod.getLoc(), builder.getArrayAttr(funcs), + builder.getI32ArrayAttr(priorities)); + } +}; + +} // end anonymous namespace diff --git a/flang/test/Fir/CUDA/cuda-constructor.f90 b/flang/test/Fir/CUDA/cuda-constructor.f90 new file mode 100644 index 0000000000000..d02350b4f4198 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-constructor.f90 @@ -0,0 +1,12 @@ +! RUN: bbc -fcuda -emit-hlfir %s -o - | fir-opt --cuf-add-constructor | FileCheck %s + +program main + real, device :: ahost(10) +end + +! CHECK: llvm.func @_FortranACUFRegisterAllocator() attributes {sym_visibility = "private"} +! CHECK-LABEL: llvm.func internal @__cudaFortranConstructor() { +! CHECK: llvm.call @_FortranACUFRegisterAllocator() : () -> () +! CHECK: llvm.return +! CHECK: } +! CHECK: llvm.mlir.global_ctors {ctors = [@__cudaFortranConstructor], priorities = [0 : i32]} From 9ef9acbd4f5f84e1bfceb677a064b724f102554e Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Tue, 24 Sep 2024 17:08:42 -0700 Subject: [PATCH 069/212] [rtsan] Introduce halt_on_error flag (#109832) --- compiler-rt/lib/rtsan/rtsan.cpp | 19 ++++++++--------- compiler-rt/lib/rtsan/rtsan_flags.inc | 3 +-- compiler-rt/test/rtsan/halt_on_error.cpp | 26 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 12 deletions(-) create mode 100644 compiler-rt/test/rtsan/halt_on_error.cpp diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 84e4b8fae1e2f..1e10069f51dd3 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -44,10 +44,11 @@ static InitializationState GetInitializationState() { atomic_load(&rtsan_initialized, memory_order_acquire)); } -static auto PrintDiagnosticsAndDieAction(DiagnosticsInfo info) { +static auto OnViolationAction(DiagnosticsInfo info) { return [info]() { __rtsan::PrintDiagnostics(info); - Die(); + if (flags().halt_on_error) + Die(); }; } @@ -105,20 +106,18 @@ __rtsan_notify_intercepted_call(const char *func_name) { __rtsan_ensure_initialized(); GET_CALLER_PC_BP; - ExpectNotRealtime( - GetContextForThisThread(), - PrintDiagnosticsAndDieAction( - {DiagnosticsInfoType::InterceptedCall, func_name, pc, bp})); + ExpectNotRealtime(GetContextForThisThread(), + OnViolationAction({DiagnosticsInfoType::InterceptedCall, + func_name, pc, bp})); } SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_notify_blocking_call(const char *func_name) { __rtsan_ensure_initialized(); GET_CALLER_PC_BP; - ExpectNotRealtime( - GetContextForThisThread(), - PrintDiagnosticsAndDieAction( - {DiagnosticsInfoType::BlockingCall, func_name, pc, bp})); + ExpectNotRealtime(GetContextForThisThread(), + OnViolationAction({DiagnosticsInfoType::BlockingCall, + func_name, pc, bp})); } } // extern "C" diff --git a/compiler-rt/lib/rtsan/rtsan_flags.inc b/compiler-rt/lib/rtsan/rtsan_flags.inc index 93b0294313672..25d62cf0a60fb 100644 --- a/compiler-rt/lib/rtsan/rtsan_flags.inc +++ b/compiler-rt/lib/rtsan/rtsan_flags.inc @@ -16,5 +16,4 @@ // RTSAN_FLAG(Type, Name, DefaultValue, Description) // See COMMON_FLAG in sanitizer_flags.inc for more details. -// Example flag, until we get a real one -// RTSAN_FLAG(bool, halt_on_error, true, "If true, halt the program on error") +RTSAN_FLAG(bool, halt_on_error, true, "Exit after first reported error.") diff --git a/compiler-rt/test/rtsan/halt_on_error.cpp b/compiler-rt/test/rtsan/halt_on_error.cpp new file mode 100644 index 0000000000000..c2ebdf349f371 --- /dev/null +++ b/compiler-rt/test/rtsan/halt_on_error.cpp @@ -0,0 +1,26 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: %env_rtsan_opts="halt_on_error=true" not %run %t 2>&1 | FileCheck %s +// RUN: %env_rtsan_opts="halt_on_error=false" %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK-NO-HALT,CHECK +// UNSUPPORTED: ios + +// Intent: Ensure that halt_on_error does not exit on the first violation. + +#include + +void *MallocViolation() { return malloc(10); } + +void FreeViolation(void *Ptr) { free(Ptr); } + +void process() [[clang::nonblocking]] { + void *Ptr = MallocViolation(); + FreeViolation(Ptr); +} + +int main() { + process(); + return 0; + // CHECK: ==ERROR: RealtimeSanitizer + // CHECK-NEXT: {{.*`malloc`.*}} + // CHECK-NO-HALT: ==ERROR: RealtimeSanitizer + // CHECK-NO-HALT-NEXT: {{.*`free`.*}} +} From 4a2d24e814295436d0c8f928897e2c55fe0214a6 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 24 Sep 2024 20:22:36 -0400 Subject: [PATCH 070/212] [gn] Reformat build files Ran: git ls-files '*.gn' '*.gni' | xargs llvm/utils/gn/gn.py format No behavior change. --- llvm/utils/gn/build/BUILD.gn | 1 + llvm/utils/gn/build/toolchain/target_flags.gni | 3 ++- llvm/utils/gn/secondary/BUILD.gn | 4 ++-- .../secondary/clang-tools-extra/clang-doc/tool/BUILD.gn | 2 +- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 2 +- llvm/utils/gn/secondary/clang/test/BUILD.gn | 4 ++-- .../gn/secondary/clang/unittests/InstallAPI/BUILD.gn | 2 +- .../secondary/compiler-rt/lib/sanitizer_common/BUILD.gn | 2 +- llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn | 2 +- llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn | 8 ++++---- llvm/utils/gn/secondary/libcxx/src/BUILD.gn | 6 +++--- llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn | 2 +- llvm/utils/gn/secondary/lld/unittests/BUILD.gn | 1 - llvm/utils/gn/secondary/lldb/test/BUILD.gn | 2 +- .../gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn | 1 - llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn | 2 +- .../secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn | 4 +--- llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn | 2 +- .../gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn | 2 +- .../llvm/unittests/Transforms/Instrumentation/BUILD.gn | 2 +- llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn | 2 +- 25 files changed, 31 insertions(+), 33 deletions(-) diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn index 27f95bb5a49f1..0b0f62721d374 100644 --- a/llvm/utils/gn/build/BUILD.gn +++ b/llvm/utils/gn/build/BUILD.gn @@ -186,6 +186,7 @@ config("compiler_defaults") { if (!is_clang) { # expand __VA_ARGS__ in "OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__)" cflags += [ "/Zc:preprocessor" ] + # cl.exe doesn't set __cplusplus correctly by default. # clang-cl gets it right by default, so don't needlessly add the flag there. cflags_cc += [ "/Zc:__cplusplus" ] diff --git a/llvm/utils/gn/build/toolchain/target_flags.gni b/llvm/utils/gn/build/toolchain/target_flags.gni index cbfa22966b48f..50d31a3da85fc 100644 --- a/llvm/utils/gn/build/toolchain/target_flags.gni +++ b/llvm/utils/gn/build/toolchain/target_flags.gni @@ -45,9 +45,10 @@ if (current_os == "android") { target_flags += [ "-isysroot", rebase_path(mac_sdk_path, root_build_dir), + # TODO(lgrey): We should be getting this from `compiler_defaults`. Why # aren't we? - "-mmacos-version-min=$mac_deployment_target", + "-mmacos-version-min=$mac_deployment_target", ] } } else if (current_os == "baremetal") { diff --git a/llvm/utils/gn/secondary/BUILD.gn b/llvm/utils/gn/secondary/BUILD.gn index a17a2fdb7a3ca..7f6b4cb43239f 100644 --- a/llvm/utils/gn/secondary/BUILD.gn +++ b/llvm/utils/gn/secondary/BUILD.gn @@ -21,12 +21,12 @@ group("default") { "//libcxxabi", ] } - if (current_os == "linux" || current_os == "win" || current_os=="mac") { + if (current_os == "linux" || current_os == "win" || current_os == "mac") { deps += [ "//compiler-rt/test/asan" ] } if (current_os == "linux" || current_os == "mac") { - deps += [ "//compiler-rt/test/lsan"] + deps += [ "//compiler-rt/test/lsan" ] } if (current_os == "linux" || current_os == "android") { diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn index b224df093c774..47dd70e629fb3 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn @@ -1,7 +1,7 @@ copy("assets") { sources = [ - "../assets/index.js", "../assets/clang-doc-default-stylesheet.css", + "../assets/index.js", ] outputs = [ "$root_build_dir/share/clang-doc/{{source_file_part}}" ] } diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index a116c0abe0b2c..c6b45efef2990 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -56,9 +56,9 @@ static_library("clangd") { "//clang/lib/Serialization", "//clang/lib/Tooling", "//clang/lib/Tooling/Core", + "//clang/lib/Tooling/DependencyScanning", "//clang/lib/Tooling/Inclusions", "//clang/lib/Tooling/Inclusions/Stdlib", - "//clang/lib/Tooling/DependencyScanning", "//clang/lib/Tooling/Refactoring", "//clang/lib/Tooling/Syntax", "//llvm/lib/Support", diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn index 1d5b8025a12ac..97610d4c95749 100644 --- a/llvm/utils/gn/secondary/clang/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn @@ -124,13 +124,13 @@ write_lit_config("lit_site_cfg") { "CMAKE_LIBRARY_OUTPUT_DIRECTORY=" + rebase_path("$root_out_dir/bin", dir), "LLVM_LIT_ERRC_MESSAGES=no such file or directory;is a directory;" + "invalid argument;permission denied", - "PERL_EXECUTABLE=" + "PERL_EXECUTABLE=", ] } else { extra_values += [ "CMAKE_LIBRARY_OUTPUT_DIRECTORY=" + rebase_path("$root_out_dir/lib", dir), "LLVM_LIT_ERRC_MESSAGES=", - "PERL_EXECUTABLE=/usr/bin/perl" + "PERL_EXECUTABLE=/usr/bin/perl", ] } diff --git a/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn index e27659457474f..b8bf438bfdb48 100644 --- a/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/InstallAPI/BUILD.gn @@ -7,7 +7,7 @@ unittest("InstallAPITests") { "//llvm/lib/Testing/Support", ] sources = [ - "HeaderFileTest.cpp", "FileListTest.cpp", + "HeaderFileTest.cpp", ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn index 450c419d25fe8..e398119fb38a9 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn @@ -167,9 +167,9 @@ source_set("sources") { "sanitizer_vector.h", "sanitizer_win.cpp", "sanitizer_win.h", + "sanitizer_win_defs.h", "sanitizer_win_interception.cpp", "sanitizer_win_interception.h", - "sanitizer_win_defs.h", "sanitizer_win_thunk_interception.h", ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn index 59ed1d1480967..7bdf9c2b994c9 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/hwasan/BUILD.gn @@ -37,8 +37,8 @@ if (current_toolchain != host_toolchain) { ":lit_site_cfg", "//compiler-rt/include($host_toolchain)", "//compiler-rt/lib/cfi:ignorelist($host_toolchain)", - "//compiler-rt/lib/hwasan:hwasan_shared", "//compiler-rt/lib/hwasan:hwasan_preinit", + "//compiler-rt/lib/hwasan:hwasan_shared", "//compiler-rt/test:lit_common_configured", "//llvm/utils/FileCheck($host_toolchain)", "//llvm/utils/llvm-lit($host_toolchain)", diff --git a/llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn index 4fb375f06caae..7dc69af57124c 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/lsan/BUILD.gn @@ -7,11 +7,13 @@ import("//llvm/version.gni") write_cmake_config("asan_mode_cfg") { input = "lit.site.cfg.py.in" - output = "$target_gen_dir/${crt_current_target_arch}AsanConfig/lit.site.cfg.py" + output = + "$target_gen_dir/${crt_current_target_arch}AsanConfig/lit.site.cfg.py" values = [ "LSAN_LIT_SOURCE_DIR=" + rebase_path("."), "LSAN_TEST_CONFIG_SUFFIX=$crt_current_target_suffix", "LSAN_TEST_TARGET_CFLAGS=$target_flags_string", + # TODO(lgrey): Support standalone mode "LSAN_LIT_TEST_MODE=AddressSanitizer", "LSAN_TEST_TARGET_ARCH=$crt_current_target_arch", @@ -59,9 +61,7 @@ if (supported_toolchains != []) { test_dir = rebase_path( get_label_info(":lit_site_cfg($toolchain)", "target_gen_dir"), root_build_dir) - args += [ - test_dir + "/${crt_current_target_arch}AsanConfig", - ] + args += [ test_dir + "/${crt_current_target_arch}AsanConfig" ] } outputs = [ "$target_gen_dir/run-lit" ] # Non-existing, so that ninja runs # it each time. diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index a94674a61873d..29e864957a5e1 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -317,13 +317,13 @@ if (libcxx_enable_experimental) { sources = [ "experimental/keep.cpp" ] if (libcxx_enable_filesystem && libcxx_enable_time_zone_database) { sources += [ + # TODO TZDB The exception could be moved in chrono once the TZDB library + # is no longer experimental. + "experimental/chrono_exception.cpp", "experimental/include/tzdb/time_zone_private.h", "experimental/include/tzdb/types_private.h", "experimental/include/tzdb/tzdb_list_private.h", "experimental/include/tzdb/tzdb_private.h", - # TODO TZDB The exception could be moved in chrono once the TZDB library - # is no longer experimental. - "experimental/chrono_exception.cpp", "experimental/time_zone.cpp", "experimental/tzdb.cpp", "experimental/tzdb_list.cpp", diff --git a/llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn b/llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn index d6af6a1c73792..6eb82ea61a1b0 100644 --- a/llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/unittests/AsLibAll/BUILD.gn @@ -3,8 +3,8 @@ import("//third-party/unittest/unittest.gni") unittest("LLDAsLibAllTests") { configs += [ "//llvm/utils/gn/build:lld_code" ] deps = [ - "//lld/Common", "//lld/COFF", + "//lld/Common", "//lld/ELF", "//lld/MachO", "//lld/MinGW", diff --git a/llvm/utils/gn/secondary/lld/unittests/BUILD.gn b/llvm/utils/gn/secondary/lld/unittests/BUILD.gn index c909670f4b1ba..6faaa12faad0d 100644 --- a/llvm/utils/gn/secondary/lld/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/unittests/BUILD.gn @@ -5,4 +5,3 @@ group("unittests") { ] testonly = true } - diff --git a/llvm/utils/gn/secondary/lldb/test/BUILD.gn b/llvm/utils/gn/secondary/lldb/test/BUILD.gn index e903d16e338c9..369b24f97d7b1 100644 --- a/llvm/utils/gn/secondary/lldb/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/test/BUILD.gn @@ -164,8 +164,8 @@ group("test") { ":lit_unit_site_cfg", "//clang/tools/driver:symlinks", "//lld/tools/lld:symlinks", - "//lldb/tools/lldb-dap", "//lldb/tools/driver:lldb", + "//lldb/tools/lldb-dap", # XXX lldb-instr, darwin-debug, etc "//lldb/tools/lldb-server", diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn index a71dfa518b1df..455a8265fce87 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/TargetParser/BUILD.gn @@ -23,8 +23,8 @@ tablegen("RISCVTargetParserDef") { group("gen") { deps = [ - ":ARMTargetParserDef", ":AArch64TargetParserDef", + ":ARMTargetParserDef", ":RISCVTargetParserDef", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn index 5df31c33a3ad0..04f819d36d581 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGenTypes/BUILD.gn @@ -10,4 +10,3 @@ static_library("CodeGenTypes") { ] sources = [ "LowLevelType.cpp" ] } - diff --git a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn index 74c1362c697b9..803dd867199de 100644 --- a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/BTF/BUILD.gn @@ -2,7 +2,7 @@ static_library("BTF") { output_name = "LLVMDebugInfoBTF" deps = [ "//llvm/lib/Support" ] sources = [ - "BTFParser.cpp", "BTFContext.cpp", + "BTFParser.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn index a4a6889dcb4a5..1fba8640a2a2b 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Utils/BUILD.gn @@ -12,7 +12,5 @@ static_library("Utils") { "//llvm/lib/Target/WebAssembly/TargetInfo", ] include_dirs = [ ".." ] - sources = [ - "WebAssemblyTypeUtilities.cpp", - ] + sources = [ "WebAssemblyTypeUtilities.cpp" ] } diff --git a/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn index a968760e1c2a3..8756ee512d01b 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn @@ -6,8 +6,8 @@ executable("llc") { "//llvm/lib/CodeGen/MIRParser", "//llvm/lib/CodeGen/SelectionDAG", "//llvm/lib/IR", - "//llvm/lib/IRReader", "//llvm/lib/IRPrinter", + "//llvm/lib/IRReader", "//llvm/lib/MC", "//llvm/lib/Passes", "//llvm/lib/Support", diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn index 01f8d0f134dd2..49cccaa5b215c 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-dwp/BUILD.gn @@ -1,7 +1,7 @@ import("//llvm/tools/binutils_symlinks.gni") +import("//llvm/utils/TableGen/tablegen.gni") import("//llvm/utils/gn/build/driver_executable.gni") import("//llvm/utils/gn/build/symlink_or_copy.gni") -import("//llvm/utils/TableGen/tablegen.gni") tablegen("Opts") { visibility = [ ":llvm-dwp" ] diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn index c974cae267371..034201adf779e 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn @@ -1,7 +1,7 @@ import("//llvm/tools/cctools_symlinks.gni") +import("//llvm/utils/TableGen/tablegen.gni") import("//llvm/utils/gn/build/driver_executable.gni") import("//llvm/utils/gn/build/symlink_or_copy.gni") -import("//llvm/utils/TableGen/tablegen.gni") tablegen("Opts") { visibility = [ ":llvm-libtool-darwin" ] diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn index b094f0e59bcce..9e3fb96861dbd 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-ml/BUILD.gn @@ -1,5 +1,5 @@ -import("//llvm/utils/gn/build/driver_executable.gni") import("//llvm/utils/TableGen/tablegen.gni") +import("//llvm/utils/gn/build/driver_executable.gni") tablegen("Opts") { visibility = [ ":llvm-ml" ] diff --git a/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn index ff0fd700c911a..9057072f3c095 100644 --- a/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn @@ -1,5 +1,5 @@ -import("//llvm/utils/gn/build/driver_executable.gni") import("//llvm/utils/TableGen/tablegen.gni") +import("//llvm/utils/gn/build/driver_executable.gni") tablegen("Opts") { visibility = [ ":sancov" ] diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn index 27ff75b18f431..c9c59acda22ac 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Instrumentation/BUILD.gn @@ -5,10 +5,10 @@ unittest("InstrumentationTests") { "//llvm/lib/Analysis", "//llvm/lib/AsmParser", "//llvm/lib/IR", - "//llvm/lib/Transforms/Instrumentation", "//llvm/lib/Passes", "//llvm/lib/Support", "//llvm/lib/Testing/Support", + "//llvm/lib/Transforms/Instrumentation", ] sources = [ "PGOInstrumentationTest.cpp" ] } diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index e93250d7f0b7c..ba52a97f39d85 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -64,9 +64,9 @@ executable("llvm-tblgen") { "SearchableTableEmitter.cpp", "SubtargetEmitter.cpp", "WebAssemblyDisassemblerEmitter.cpp", - "X86InstrMappingEmitter.cpp", "X86DisassemblerTables.cpp", "X86FoldTablesEmitter.cpp", + "X86InstrMappingEmitter.cpp", "X86MnemonicTables.cpp", "X86ModRMFilters.cpp", "X86RecognizableInstr.cpp", From e33e087a175567e88b361fd198536314a0a1fff2 Mon Sep 17 00:00:00 2001 From: "Ruiling, Song" Date: Wed, 25 Sep 2024 08:42:40 +0800 Subject: [PATCH 071/212] [MachineSink] Update register dependency correctly (#109763) The accumulateUsedDefed() was missing if block prologue interference check does not pass. This would cause incorrect register dependency, which cause incorrect sinking. --- llvm/lib/CodeGen/MachineSink.cpp | 5 +- .../AMDGPU/postra-sink-update-dependency.mir | 66 +++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 609f9af9767f5..658ebd47488c7 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -2152,8 +2152,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineBasicBlock::iterator InsertPos = SuccBB->SkipPHIsAndLabels(SuccBB->begin()); if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) { - LLVM_DEBUG( - dbgs() << " *** Not sinking: prologue interference\n"); + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n"); continue; } diff --git a/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir b/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir new file mode 100644 index 0000000000000..14617e066f954 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir @@ -0,0 +1,66 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s +# +# In the example, the ` $sgpr4 = COPY $sgpr2` was incorrectly sunk into bb.3. This happened because we did not update +# register uses when we found that `$sgpr2 = COPY $sgpr3` should not be sunk because of conflict with the successor's +# prologue instructions. +--- +name: update_dependency_correctly +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: update_dependency_correctly + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr3, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr1 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr4 = COPY $sgpr2 + ; CHECK-NEXT: renamable $sgpr2 = COPY $sgpr3 + ; CHECK-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, $vgpr1 + ; CHECK-NEXT: $sgpr1 = S_AND_SAVEEXEC_B32 $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr2, $sgpr4, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0 + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo + ; CHECK-NEXT: renamable $sgpr5 = COPY $sgpr1 + ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 32, 0 + ; CHECK-NEXT: S_BRANCH %bb.1 + bb.0: + successors: %bb.3(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $sgpr3, $sgpr2 + + $vgpr1 = IMPLICIT_DEF + + renamable $sgpr4 = COPY $sgpr2 + renamable $sgpr2 = COPY $sgpr3 + + $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, $vgpr1 + + $sgpr1 = S_AND_SAVEEXEC_B32 $sgpr0, implicit-def $exec, implicit-def $scc, implicit $exec + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + S_ENDPGM 0 + + bb.3: + successors: %bb.2(0x40000000) + liveins: $sgpr0, $sgpr2, $sgpr4, $vgpr1 + + $sgpr3 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0 + + renamable $sgpr0_sgpr1 = S_GETPC_B64_pseudo + renamable $sgpr5 = COPY $sgpr1 + renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 32, 0 + + S_BRANCH %bb.2 + +... From 2495130c1b5c255bbcf5ff84555026649c5fa697 Mon Sep 17 00:00:00 2001 From: beetrees Date: Wed, 25 Sep 2024 01:51:13 +0100 Subject: [PATCH 072/212] [compiler-rt] Add missing carry to 128x128->256 wide multiply (#97257) --- compiler-rt/lib/builtins/fp_lib.h | 5 ++++- compiler-rt/test/builtins/Unit/multf3_test.c | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h index 0289cfd10db66..fae58497a8f80 100644 --- a/compiler-rt/lib/builtins/fp_lib.h +++ b/compiler-rt/lib/builtins/fp_lib.h @@ -171,8 +171,11 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { (sum2 & Word_FullMask) + ((sum3 << 32) & Word_HiMask); *lo = r0 + (r1 << 64); + // The addition above can overflow, in which case `*lo` will be less than + // `r0`. Carry any overflow into `hi`. + const bool carry = *lo < r0; *hi = (r1 >> 64) + (sum1 >> 96) + (sum2 >> 64) + (sum3 >> 32) + sum4 + - (sum5 << 32) + (sum6 << 64); + (sum5 << 32) + (sum6 << 64) + carry; } #undef Word_1 #undef Word_2 diff --git a/compiler-rt/test/builtins/Unit/multf3_test.c b/compiler-rt/test/builtins/Unit/multf3_test.c index 543b55899ce82..0e561551d3534 100644 --- a/compiler-rt/test/builtins/Unit/multf3_test.c +++ b/compiler-rt/test/builtins/Unit/multf3_test.c @@ -77,6 +77,12 @@ int main() UINT64_C(0x0), UINT64_C(0x0))) return 1; + // test carry between lo and hi in widening multiply + if (test__multf3(0x0.7fffffffffffffffffffffffffffp-16382L, + 0x1.7fffffffffffffffffffffffffffp+1L, + UINT64_C(0x00017fffffffffff), + UINT64_C(0xfffffffffffffffc))) + return 1; #else printf("skipped\n"); From 4fc08b6cd57bda1d3e28eae283f7b20f8ce463d1 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 25 Sep 2024 09:52:22 +0900 Subject: [PATCH 073/212] Revert "[AMDGPU][GlobalIsel] Use isRegisterClassType for G_FREEZE and G_IMPLICIT_DEF (#101331)" This reverts commit 63b2595846b86b4e4eb9afba5e97dd64e8135c10. (llvmorg-20-init-6782-g63b2595846b8) A few bots have been failing on `inst-select-unmerge-values.mir` --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 8 +- .../GlobalISel/inst-select-unmerge-values.mir | 18 +- .../AMDGPU/GlobalISel/legalize-freeze.mir | 30 +- .../GlobalISel/legalize-implicit-def.mir | 28 +- .../GlobalISel/legalize-insert-vector-elt.mir | 14 +- .../AMDGPU/GlobalISel/legalize-phi.mir | 152 +- .../AMDGPU/GlobalISel/regbankselect.mir | 19 + llvm/test/CodeGen/AMDGPU/freeze.ll | 1856 ----------------- 8 files changed, 157 insertions(+), 1968 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/freeze.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 2ebb3798b03ab..e657f668cc656 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -292,7 +292,6 @@ static const LLT S160 = LLT::scalar(160); static const LLT S224 = LLT::scalar(224); static const LLT S256 = LLT::scalar(256); static const LLT S512 = LLT::scalar(512); -static const LLT S1024 = LLT::scalar(1024); static const LLT MaxScalar = LLT::scalar(MaxRegisterSize); static const LLT V2S8 = LLT::fixed_vector(2, 8); @@ -333,8 +332,8 @@ static const LLT V16S64 = LLT::fixed_vector(16, 64); static const LLT V2S128 = LLT::fixed_vector(2, 128); static const LLT V4S128 = LLT::fixed_vector(4, 128); -static std::initializer_list AllScalarTypes = { - S32, S64, S96, S128, S160, S224, S256, S512, S1024}; +static std::initializer_list AllScalarTypes = {S32, S64, S96, S128, + S160, S224, S256, S512}; static std::initializer_list AllS16Vectors{ V2S16, V4S16, V6S16, V8S16, V10S16, V12S16, V16S16, V2S128, V4S128}; @@ -890,11 +889,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .clampScalar(0, S16, S64); getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) - .legalIf(isRegisterClassType(0)) + .legalIf(isRegisterType(0)) // s1 and s16 are special cases because they have legal operations on // them, but don't really occupy registers in the normal way. .legalFor({S1, S16}) - .clampNumElements(0, V16S32, V32S32) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .clampScalarOrElt(0, S32, MaxScalar) .widenScalarToNextPow2(0, 32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index 837f65d4bdec6..bec5f646b7839 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -171,9 +171,11 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64) + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] %0:sgpr(s192) = G_IMPLICIT_DEF %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -292,11 +294,11 @@ body: | ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>) ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>), [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]](<3 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[COPY5]](<3 x s32>) - ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV]](<3 x s32>) - ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV1]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>) + ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>) + ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>) %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6 %2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index b08f850b5b2b1..c06df6312c9c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -171,8 +171,12 @@ body: | ; CHECK-LABEL: name: test_freeze_s448 ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]] - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[COPY]](s512) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s448) = G_FREEZE [[TRUNC]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FREEZE]](s448) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[DEF]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s448) = G_TRUNC %0 %2:_(s448) = G_FREEZE %1 @@ -395,12 +399,14 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v33s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<32 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE2]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x s32>) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(<33 x s32>) = G_FREEZE %0 @@ -413,10 +419,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v64s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<32 x s32>), [[FREEZE1]](<32 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[FREEZE3]](<16 x s32>) ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<64 x s32>) = G_FREEZE %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index 8113ebfa5362e..b9edfbfa6d0a9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -135,9 +135,8 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s448 - ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[DEF]](s512) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[TRUNC]](s448), 0 + ; CHECK: [[DEF:%[0-9]+]]:_(s448) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s448), 0 ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s448) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 @@ -296,6 +295,18 @@ body: | $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0 ... +--- +name: test_implicit_def_v17s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v17s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<17 x s32>) + %0:_(<17 x s32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0 +... + --- name: test_implicit_def_v32s32 body: | @@ -317,9 +328,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) @@ -337,9 +348,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v64s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<32 x s32>), [[DEF]](<32 x s32>) - ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[DEF]](<32 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<32 x s32>), %2:_(<32 x s32>) = G_UNMERGE_VALUES %0 S_NOP 0, implicit %0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index bebbf2a262256..b57dd396ae355 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -190,11 +190,13 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32 ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>), [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -241,8 +243,10 @@ body: | ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](s64) ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>), [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>), [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK-NEXT: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index d82e8328f26ec..00612d552a104 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -673,86 +673,88 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32), [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32), [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32), [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32), [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32), [[UV128:%[0-9]+]]:_(s32), [[UV129:%[0-9]+]]:_(s32), [[UV130:%[0-9]+]]:_(s32), [[UV131:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] - ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] - ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] - ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] - ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] - ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] - ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] - ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] - ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] - ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] - ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] - ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] - ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] - ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] - ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] - ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] - ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] - ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] - ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] - ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] - ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] - ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] - ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] - ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] - ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] - ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] - ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] - ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] - ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] - ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] - ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] - ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] - ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] - ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] - ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] - ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] - ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] - ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] - ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] - ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] - ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] - ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] - ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] - ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] - ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] - ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV64]], [[UV128]] - ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[UV129]] - ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV66]], [[UV130]] - ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV67]], [[UV131]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV64]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV65]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV66]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV67]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] + ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] + ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] + ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] + ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] + ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] + ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] + ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] + ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] + ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] + ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] + ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] + ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] + ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] + ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] + ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] + ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] + ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] + ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] + ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] + ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] + ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] + ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] + ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] + ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] + ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] + ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] + ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] + ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] + ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] + ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] + ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] + ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) @@ -760,10 +762,10 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV2]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV3]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index 1565986516860..c50187f594901 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -42,6 +42,8 @@ ret void } + define void @non_power_of_2() { ret void } + define amdgpu_kernel void @load_constant_v4i16_from_8_align8(ptr addrspace(4) %ptr0) { ret void } @@ -184,6 +186,23 @@ body: | %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.tmp1) ... +--- +name: non_power_of_2 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: non_power_of_2 + ; CHECK: [[DEF:%[0-9]+]]:sgpr(s448) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[DEF]](s448), 0 + ; CHECK-NEXT: $sgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0 + %0:_(s448) = G_IMPLICIT_DEF + %1:_(s32) = G_EXTRACT %0:_(s448), 0 + $sgpr0 = COPY %1:_(s32) + SI_RETURN_TO_EPILOG $sgpr0 +... + --- name: load_constant_v4i16_from_8_align8 legalized: true diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll deleted file mode 100644 index 22427ee344d91..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/freeze.ll +++ /dev/null @@ -1,1856 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s - -define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_v2i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_v2i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load <2 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <2 x i32> %a - store <2 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_v3i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx3 v[4:6], v[0:1], off -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx3 v[2:3], v[4:6], off -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_v3i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: global_load_b96 v[4:6], v[0:1], off -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b96 v[2:3], v[4:6], off -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load <3 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <3 x i32> %a - store <3 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_v4i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_v4i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load <4 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <4 x i32> %a - store <4 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v5i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x1 -; GFX10-SDAG-NEXT: global_load_dword v8, v[0:1], off offset:16 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dword v[2:3], v8, off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v5i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x1 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v5i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x1 -; GFX11-SDAG-NEXT: global_load_b32 v8, v[0:1], off offset:16 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v8, off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v5i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x1 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:16 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <5 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <5 x i32> %a - store <5 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v6i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x1 -; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v6i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x1 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v6i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x1 -; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v6i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x1 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <6 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <6 x i32> %a - store <6 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v7i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x1 -; GFX10-SDAG-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v7i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x1 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v7i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x1 -; GFX11-SDAG-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v7i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x1 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <7 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <7 x i32> %a - store <7 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v8i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x1 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v8i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x1 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v8i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x1 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v8i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x1 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <8 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <8 x i32> %a - store <8 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v9i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x2 -; GFX10-SDAG-NEXT: global_load_dword v12, v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dword v[2:3], v12, off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v9i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x2 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v9i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x2 -; GFX11-SDAG-NEXT: global_load_b32 v12, v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v12, off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v9i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x2 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:32 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <9 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <9 x i32> %a - store <9 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_v10i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_clause 0x2 -; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32 -; GFX10-NEXT: s_waitcnt vmcnt(2) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_v10i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off offset:32 -; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off offset:32 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load <10 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <10 x i32> %a - store <10 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_v11i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_clause 0x2 -; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32 -; GFX10-NEXT: s_waitcnt vmcnt(2) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_v11i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-NEXT: global_load_b96 v[12:14], v[0:1], off offset:32 -; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b96 v[2:3], v[12:14], off offset:32 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load <11 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <11 x i32> %a - store <11 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_v12i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_clause 0x2 -; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-NEXT: s_waitcnt vmcnt(2) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_v12i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load <12 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <12 x i32> %a - store <12 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} -define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v13i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x3 -; GFX10-SDAG-NEXT: global_load_dword v16, v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dword v[2:3], v16, off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v13i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x3 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v13i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x3 -; GFX11-SDAG-NEXT: global_load_b32 v16, v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v16, off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v13i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x3 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:48 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <13 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <13 x i32> %a - store <13 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v14i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x3 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v14i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x3 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v14i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x3 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b64 v[16:17], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[16:17], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v14i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x3 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <14 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <14 x i32> %a - store <14 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v15i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x3 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v15i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x3 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v15i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x3 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v15i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x3 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <15 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <15 x i32> %a - store <15 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v16i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x3 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v16i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x3 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v16i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x3 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v16i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x3 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <16 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <16 x i32> %a - store <16 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v17i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x4 -; GFX10-SDAG-NEXT: global_load_dword v20, v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dword v[2:3], v20, off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v17i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x4 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v17i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x4 -; GFX11-SDAG-NEXT: global_load_b32 v20, v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v20, off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v17i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x4 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:64 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <17 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <17 x i32> %a - store <17 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v18i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x4 -; GFX10-SDAG-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v18i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x4 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v18i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x4 -; GFX11-SDAG-NEXT: global_load_b64 v[20:21], v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[20:21], off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v18i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x4 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <18 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <18 x i32> %a - store <18 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v19i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x4 -; GFX10-SDAG-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v19i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x4 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v19i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x4 -; GFX11-SDAG-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v19i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x4 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <19 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <19 x i32> %a - store <19 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v20i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x4 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v20i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x4 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v20i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x4 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v20i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x4 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <20 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <20 x i32> %a - store <20 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v21i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x5 -; GFX10-SDAG-NEXT: global_load_dword v24, v[0:1], off offset:80 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX10-SDAG-NEXT: global_store_dword v[2:3], v24, off offset:80 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v21i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x5 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 -; GFX10-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v21i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x5 -; GFX11-SDAG-NEXT: global_load_b32 v24, v[0:1], off offset:80 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v24, off offset:80 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v21i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x5 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:80 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:80 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <21 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <21 x i32> %a - store <21 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v22i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x5 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v22i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x5 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 -; GFX10-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v22i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x5 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b64 v[24:25], v[0:1], off offset:80 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[24:25], off offset:80 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v22i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x5 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:80 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:80 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <22 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <22 x i32> %a - store <22 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v30i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x7 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 -; GFX10-SDAG-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) -; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v30i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x7 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 -; GFX10-GISEL-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v30i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x7 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 -; GFX11-SDAG-NEXT: global_load_b64 v[32:33], v[0:1], off offset:112 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) -; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[32:33], off offset:112 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v30i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x7 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 -; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 -; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:112 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:112 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <30 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <30 x i32> %a - store <30 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v31i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x7 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 -; GFX10-SDAG-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) -; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v31i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x7 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 -; GFX10-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v31i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x7 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 -; GFX11-SDAG-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) -; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v31i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x7 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 -; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 -; GFX11-GISEL-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <31 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <31 x i32> %a - store <31 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_v32i32: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x7 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off -; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_v32i32: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x7 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_v32i32: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x7 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112 -; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64 -; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80 -; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32 -; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48 -; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off -; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_v32i32: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x7 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 -; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 -; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 -; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 -; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 -; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load <32 x i32>, ptr addrspace(1) %ptra, align 4 - %freeze = freeze <32 x i32> %a - store <32 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_i32: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: global_load_b32 v0, v[0:1], off -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b32 v[2:3], v0, off -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load i32, ptr addrspace(1) %ptra, align 4 - %freeze = freeze i32 %a - store i32 %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_i64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_i64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load i64, ptr addrspace(1) %ptra, align 4 - %freeze = freeze i64 %a - store i64 %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_float: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: global_load_dword v0, v[0:1], off -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dword v[2:3], v0, off -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_float: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: global_load_b32 v0, v[0:1], off -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b32 v[2:3], v0, off -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load float, ptr addrspace(1) %ptra, align 4 - %freeze = freeze float %a - store float %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-LABEL: freeze_i128: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: freeze_i128: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-NEXT: s_setpc_b64 s[30:31] - %a = load i128, ptr addrspace(1) %ptra, align 4 - %freeze = freeze i128 %a - store i128 %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} - -define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { -; GFX10-SDAG-LABEL: freeze_i256: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: s_clause 0x1 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 -; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: freeze_i256: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: s_clause 0x1 -; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: freeze_i256: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_clause 0x1 -; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 -; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: freeze_i256: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: s_clause 0x1 -; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off -; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %a = load i256, ptr addrspace(1) %ptra, align 4 - %freeze = freeze i256 %a - store i256 %freeze, ptr addrspace(1) %ptrb, align 4 - ret void -} From 642bfd89f94f90bc9696a81e72333f6eb1ce5f20 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 09:15:17 +0800 Subject: [PATCH 074/212] [Clang][compiler-rt][UBSan] Improve `__ubsan_handle_invalid_builtin` (#109088) This patch improves error message, and fixes a copy-paste mistake in GET_REPORT_OPTIONS argument. Address comment https://github.com/llvm/llvm-project/pull/104741#discussion_r1764323722. --------- Co-authored-by: Vitaly Buka --- compiler-rt/lib/ubsan/ubsan_handlers.cpp | 6 +++--- .../TestCases/Integer/suppressions-builtin.cpp | 18 ++++++++++++++++++ .../test/ubsan/TestCases/Misc/builtins.cpp | 14 +++++++------- 3 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 compiler-rt/test/ubsan/TestCases/Integer/suppressions-builtin.cpp diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 27d01653f088d..9dbe8e6c0c174 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -634,12 +634,12 @@ static void handleInvalidBuiltin(InvalidBuiltinData *Data, ReportOptions Opts) { ScopedReport R(Opts, Loc, ET); Diag(Loc, DL_Error, ET, - "passing zero to %0, which is not a valid argument") - << ((Data->Kind == BCK_CTZPassedZero) ? "ctz()" : "clz()"); + "passing zero to __builtin_%0(), which is not a valid argument") + << ((Data->Kind == BCK_CTZPassedZero) ? "ctz" : "clz"); } void __ubsan::__ubsan_handle_invalid_builtin(InvalidBuiltinData *Data) { - GET_REPORT_OPTIONS(true); + GET_REPORT_OPTIONS(false); handleInvalidBuiltin(Data, Opts); } void __ubsan::__ubsan_handle_invalid_builtin_abort(InvalidBuiltinData *Data) { diff --git a/compiler-rt/test/ubsan/TestCases/Integer/suppressions-builtin.cpp b/compiler-rt/test/ubsan/TestCases/Integer/suppressions-builtin.cpp new file mode 100644 index 0000000000000..60377c492e8cc --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/Integer/suppressions-builtin.cpp @@ -0,0 +1,18 @@ +// RUN: %clangxx -fsanitize=builtin -g0 %s -o %t + +// Suppression by symbol name requires the compiler-rt runtime to be able to +// symbolize stack addresses. +// REQUIRES: can-symbolize +// UNSUPPORTED: android + +// RUN: echo "invalid-builtin-use:do_ctz" > %t.func-supp +// RUN: %env_ubsan_opts=halt_on_error=1:suppressions='"%t.func-supp"' %run %t + +#include + +extern "C" void do_ctz(int n) { __builtin_ctz(0); } + +int main() { + do_ctz(0); + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp b/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp index f8f564cb7baae..a635f7fcc686e 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp @@ -6,25 +6,25 @@ // RUN: not %run %t.abort 2>&1 | FileCheck %s --check-prefix=ABORT void check_ctz(int n) { - // ABORT: builtins.cpp:[[@LINE+2]]:17: runtime error: passing zero to ctz(), which is not a valid argument - // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to ctz(), which is not a valid argument + // ABORT: builtins.cpp:[[@LINE+2]]:17: runtime error: passing zero to __builtin_ctz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to __builtin_ctz(), which is not a valid argument __builtin_ctz(n); - // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to ctz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to __builtin_ctz(), which is not a valid argument __builtin_ctzl(n); - // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to ctz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to __builtin_ctz(), which is not a valid argument __builtin_ctzll(n); } void check_clz(int n) { - // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to clz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:17: runtime error: passing zero to __builtin_clz(), which is not a valid argument __builtin_clz(n); - // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to clz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:18: runtime error: passing zero to __builtin_clz(), which is not a valid argument __builtin_clzl(n); - // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to clz(), which is not a valid argument + // RECOVER: builtins.cpp:[[@LINE+1]]:19: runtime error: passing zero to __builtin_clz(), which is not a valid argument __builtin_clzll(n); } From 3da5e82e31712792411945b655929a1680fb476c Mon Sep 17 00:00:00 2001 From: duk <74797529+duk-37@users.noreply.github.com> Date: Tue, 24 Sep 2024 21:44:59 -0400 Subject: [PATCH 075/212] [NVPTX] Fix NVPTXLowerUnreachable::isLoweredToTrap logic (#109730) Previously, this pass would not generate traps if `NoTrapAfterNoreturn` was set and would generate traps even if the instruction directly before the `UnreachableInst` was `llvm.trap()`. Fix both of these problems and add some tests. --- .../Target/NVPTX/NVPTXLowerUnreachable.cpp | 21 ++++++++++----- llvm/test/CodeGen/NVPTX/unreachable.ll | 26 +++++++++++++++---- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp index 92b90e2559154..a289d35f9b3f1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp @@ -110,17 +110,24 @@ StringRef NVPTXLowerUnreachable::getPassName() const { } // ============================================================================= -// Returns whether a `trap` intrinsic should be emitted before I. +// Returns whether a `trap` intrinsic would be emitted before I. // // This is a copy of the logic in SelectionDAGBuilder::visitUnreachable(). // ============================================================================= bool NVPTXLowerUnreachable::isLoweredToTrap(const UnreachableInst &I) const { - if (!TrapUnreachable) - return false; - if (!NoTrapAfterNoreturn) - return true; - const CallInst *Call = dyn_cast_or_null(I.getPrevNode()); - return Call && Call->doesNotReturn(); + if (const auto *Call = dyn_cast_or_null(I.getPrevNode())) { + // We've already emitted a non-continuable trap. + if (Call->isNonContinuableTrap()) + return true; + + // No traps are emitted for calls that do not return + // when this option is enabled. + if (NoTrapAfterNoreturn && Call->doesNotReturn()) + return false; + } + + // In all other cases, we will generate a trap if TrapUnreachable is set. + return TrapUnreachable; } // ============================================================================= diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll index 011497c4e2340..f9118900cb737 100644 --- a/llvm/test/CodeGen/NVPTX/unreachable.ll +++ b/llvm/test/CodeGen/NVPTX/unreachable.ll @@ -1,18 +1,23 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs \ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs \ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP -; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \ +; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable \ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \ ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; CHECK: .extern .func throw declare void @throw() #0 +declare void @llvm.trap() #0 -; CHECK: .entry kernel_func +; CHECK-LABEL: .entry kernel_func define void @kernel_func() { ; CHECK: call.uni ; CHECK: throw, @@ -24,6 +29,17 @@ define void @kernel_func() { unreachable } +; CHECK-LABEL: kernel_func_2 +define void @kernel_func_2() { +; CHECK: trap; exit; + call void @llvm.trap() + +;; Make sure we avoid emitting two trap instructions. +; CHECK-NOT: trap; +; CHECK-NOT: exit; + unreachable +} + attributes #0 = { noreturn } From 0a42c7c6679bcc6f7be4b3d103670197acac96a9 Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Tue, 24 Sep 2024 22:57:07 -0300 Subject: [PATCH 076/212] [clang] fix assert in ADL finding entity in the implicit global module (#109882) This adds to the assert the implicit global module case as in module purview. Fixes #109879 --- clang/docs/ReleaseNotes.rst | 7 +++++-- clang/lib/Sema/SemaLookup.cpp | 5 +++-- clang/test/Modules/GH109879-1.cpp | 25 +++++++++++++++++++++++++ clang/test/Modules/GH109879-2.cpp | 29 +++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 clang/test/Modules/GH109879-1.cpp create mode 100644 clang/test/Modules/GH109879-2.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e511614fcf245..5923888383022 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -43,7 +43,7 @@ code bases. still supporting SPARC V8 CPUs need to specify ``-mcpu=v8`` with a `config file `_. - + - The ``clang-rename`` tool has been removed. C/C++ Language Potentially Breaking Changes @@ -115,7 +115,7 @@ C++ Language Changes - Allow single element access of GCC vector/ext_vector_type object to be constant expression. Supports the `V.xyzw` syntax and other tidbits as seen in OpenCL. Selecting multiple elements is left as a future work. -- Implement `CWG1815 `_. Support lifetime extension +- Implement `CWG1815 `_. Support lifetime extension of temporary created by aggregate initialization using a default member initializer. @@ -452,6 +452,9 @@ Miscellaneous Clang Crashes Fixed - Fixed ``-ast-dump`` crashes on codes involving ``concept`` with ``-ast-dump-decl-types``. (#GH94928) +- Fixed internal assertion firing when a declaration in the implicit global + module is found through ADL. (GH#109879) + OpenACC Specific Changes ------------------------ diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index ed5d44aa898f4..f3f62474d0644 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -3850,8 +3850,9 @@ void Sema::ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc, // exports are only valid in module purview and outside of any // PMF (although a PMF should not even be present in a module // with an import). - assert(FM && FM->isNamedModule() && !FM->isPrivateModule() && - "bad export context"); + assert(FM && + (FM->isNamedModule() || FM->isImplicitGlobalModule()) && + !FM->isPrivateModule() && "bad export context"); // .. are attached to a named module M, do not appear in the // translation unit containing the point of the lookup.. if (D->isInAnotherModuleUnit() && diff --git a/clang/test/Modules/GH109879-1.cpp b/clang/test/Modules/GH109879-1.cpp new file mode 100644 index 0000000000000..72cfb11081e48 --- /dev/null +++ b/clang/test/Modules/GH109879-1.cpp @@ -0,0 +1,25 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -fprebuilt-module-path=%t -emit-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -fprebuilt-module-path=%t -verify %t/C.cpp + +//--- A.cppm +export module A; +export extern "C" void foo(struct Bar); + +//--- B.cppm +module; +import A; +export module B; + +//--- C.cpp +import B; +struct Bar {}; +void test() { + foo(Bar()); + // expected-error@-1 {{declaration of 'foo' must be imported}} + // expected-note@A.cppm:2 {{declaration here is not visible}} +} diff --git a/clang/test/Modules/GH109879-2.cpp b/clang/test/Modules/GH109879-2.cpp new file mode 100644 index 0000000000000..ccec57839898a --- /dev/null +++ b/clang/test/Modules/GH109879-2.cpp @@ -0,0 +1,29 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -std=c++20 %t/B.cppm -fprebuilt-module-path=%t -emit-module-interface -o %t/B.pcm +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -fprebuilt-module-path=%t -verify %t/C.cpp + +//--- foo.h +struct Bar {}; +extern "C" void foo(struct Bar); + +//--- A.cppm +module; +#include "foo.h" +export module A; +export extern "C" using ::foo; +//--- B.cppm +module; +import A; +export module B; + +//--- C.cpp +// expected-no-diagnostics +import B; +#include "foo.h" +void test() { + foo(Bar()); +} From 7a086e1b2dc05f54afae3591614feede727601fa Mon Sep 17 00:00:00 2001 From: yabinc Date: Tue, 24 Sep 2024 19:06:20 -0700 Subject: [PATCH 077/212] [clang][CodeGen] Zero init unspecified fields in initializers in C (#97121) When an initializer is provided to a variable, the Linux kernel relied on the compiler to zero-initialize unspecified fields, as clarified in https://www.spinics.net/lists/netdev/msg1007244.html. But clang doesn't guarantee this: 1. For a union type, if an empty initializer is given, clang only initializes bytes for the first field, left bytes for other (larger) fields are marked as undef. Accessing those undef bytes can lead to undefined behaviors. 2. For a union type, if an initializer explicitly sets a field, left bytes for other (larger) fields are marked as undef. 3. When an initializer is given, clang doesn't zero initialize padding. So this patch makes the following change: 1. In C, when an initializer is provided for a variable, zero-initialize undef and padding fields in the initializer. 2. Document the change in LanguageExtensions.rst. As suggested in https://github.com/llvm/llvm-project/issues/78034#issuecomment-2183437928, the change isn't required by C23, but it's standards conforming to do so. Fixes: https://github.com/llvm/llvm-project/issues/97459 --- clang/docs/LanguageExtensions.rst | 23 ++ clang/lib/CodeGen/CGExprAgg.cpp | 40 ++- clang/lib/CodeGen/CGExprConstant.cpp | 93 +++++- clang/lib/CodeGen/CodeGenModule.h | 51 ++++ ...07-22-bitfield-init-after-zero-len-array.c | 2 +- clang/test/CodeGen/2008-08-07-AlignPadding1.c | 4 +- .../CodeGen/2009-06-14-anonymous-union-init.c | 4 +- clang/test/CodeGen/64bit-swiftcall.c | 12 +- clang/test/CodeGen/arm-swiftcall.c | 4 +- clang/test/CodeGen/const-init.c | 4 +- clang/test/CodeGen/decl.c | 4 +- clang/test/CodeGen/designated-initializers.c | 12 +- clang/test/CodeGen/ext-int.c | 18 +- clang/test/CodeGen/flexible-array-init.c | 24 +- clang/test/CodeGen/global-init.c | 2 +- clang/test/CodeGen/init.c | 19 -- .../linux-kernel-struct-union-initializer.c | 267 ++++++++++++++++++ .../linux-kernel-struct-union-initializer2.c | 140 +++++++++ clang/test/CodeGen/mingw-long-double.c | 9 +- clang/test/CodeGen/mms-bitfields.c | 4 +- clang/test/CodeGen/union-init2.c | 4 +- clang/test/CodeGen/windows-swiftcall.c | 12 +- .../CodeGenObjC/designated-initializers.m | 2 +- 23 files changed, 658 insertions(+), 96 deletions(-) create mode 100644 clang/test/CodeGen/linux-kernel-struct-union-initializer.c create mode 100644 clang/test/CodeGen/linux-kernel-struct-union-initializer2.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 0c6b9b1b8f9ce..f4be97047422f 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -5860,3 +5860,26 @@ specify the starting offset to begin embedding from. The resources is treated as being empty if the specified offset is larger than the number of bytes in the resource. The offset will be applied *before* any ``limit`` parameters are applied. + +Union and aggregate initialization in C +======================================= + +In C23 (N2900), when an object is initialized from initializer ``= {}``, all +elements of arrays, all members of structs, and the first members of unions are +empty-initialized recursively. In addition, all padding bits are initialized to +zero. + +Clang guarantees the following behaviors: + +* ``1:`` Clang supports initializer ``= {}`` mentioned above in all C + standards. + +* ``2:`` When unions are initialized from initializer ``= {}``, bytes outside + of the first members of unions are also initialized to zero. + +* ``3:`` When unions, structures and arrays are initialized from initializer + ``= { initializer-list }``, all members not explicitly initialized in + the initializer list are empty-initialized recursively. In addition, all + padding bits are initialized to zero. + +Currently, the above extension only applies to C source code, not C++. diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index bbfc6672ecc25..43f3bcc95fe76 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -1698,6 +1698,17 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( // Prepare a 'this' for CXXDefaultInitExprs. CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress()); + const bool ZeroInitPadding = + CGF.CGM.shouldZeroInitPadding() && !Dest.isZeroed(); + const Address BaseLoc = Dest.getAddress().withElementType(CGF.Int8Ty); + auto DoZeroInitPadding = [&](CharUnits Offset, CharUnits Size) { + if (Size.isPositive()) { + Address Loc = CGF.Builder.CreateConstGEP(BaseLoc, Offset.getQuantity()); + llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity()); + CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false); + } + }; + if (record->isUnion()) { // Only initialize one field of a union. The field itself is // specified by the initializer list. @@ -1722,17 +1733,37 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( if (NumInitElements) { // Store the initializer into the field EmitInitializationToLValue(InitExprs[0], FieldLoc); + if (ZeroInitPadding) { + CharUnits TotalSize = + Dest.getPreferredSize(CGF.getContext(), DestLV.getType()); + CharUnits FieldSize = + CGF.getContext().getTypeSizeInChars(FieldLoc.getType()); + DoZeroInitPadding(FieldSize, TotalSize - FieldSize); + } } else { // Default-initialize to null. - EmitNullInitializationToLValue(FieldLoc); + if (ZeroInitPadding) + EmitNullInitializationToLValue(DestLV); + else + EmitNullInitializationToLValue(FieldLoc); } - return; } // Here we iterate over the fields; this makes it simpler to both // default-initialize fields and skip over unnamed fields. + const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(record); + CharUnits SizeSoFar = CharUnits::Zero(); for (const auto *field : record->fields()) { + if (ZeroInitPadding) { + unsigned FieldNo = field->getFieldIndex(); + CharUnits Offset = + CGF.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo)); + DoZeroInitPadding(SizeSoFar, Offset - SizeSoFar); + CharUnits FieldSize = + CGF.getContext().getTypeSizeInChars(field->getType()); + SizeSoFar = Offset + FieldSize; + } // We're done once we hit the flexible array member. if (field->getType()->isIncompleteArrayType()) break; @@ -1774,6 +1805,11 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( } } } + if (ZeroInitPadding) { + CharUnits TotalSize = + Dest.getPreferredSize(CGF.getContext(), DestLV.getType()); + DoZeroInitPadding(SizeSoFar, TotalSize - SizeSoFar); + } } void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index dd65080a84044..66bc0640b632a 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -42,6 +42,16 @@ using namespace CodeGen; namespace { class ConstExprEmitter; +llvm::Constant *getPadding(const CodeGenModule &CGM, CharUnits PadSize) { + llvm::Type *Ty = CGM.CharTy; + if (PadSize > CharUnits::One()) + Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); + if (CGM.shouldZeroInitPadding()) { + return llvm::Constant::getNullValue(Ty); + } + return llvm::UndefValue::get(Ty); +} + struct ConstantAggregateBuilderUtils { CodeGenModule &CGM; @@ -61,10 +71,7 @@ struct ConstantAggregateBuilderUtils { } llvm::Constant *getPadding(CharUnits PadSize) const { - llvm::Type *Ty = CGM.CharTy; - if (PadSize > CharUnits::One()) - Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); - return llvm::UndefValue::get(Ty); + return ::getPadding(CGM, PadSize); } llvm::Constant *getZeroes(CharUnits ZeroSize) const { @@ -591,6 +598,11 @@ class ConstStructBuilder { bool Build(const InitListExpr *ILE, bool AllowOverwrite); bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits BaseOffset); + bool DoZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo, + const FieldDecl &Field, bool AllowOverwrite, + CharUnits &FieldSize, CharUnits &SizeSoFar); + bool DoZeroInitPadding(const ASTRecordLayout &Layout, bool AllowOverwrite, + CharUnits &SizeSoFar); llvm::Constant *Finalize(QualType Ty); }; @@ -715,6 +727,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { if (CXXRD->getNumBases()) return false; + const bool ZeroInitPadding = CGM.shouldZeroInitPadding(); + CharUnits FieldSize = CharUnits::Zero(); + CharUnits SizeSoFar = CharUnits::Zero(); + for (FieldDecl *Field : RD->fields()) { ++FieldNo; @@ -732,8 +748,13 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { const Expr *Init = nullptr; if (ElementNo < ILE->getNumInits()) Init = ILE->getInit(ElementNo++); - if (isa_and_nonnull(Init)) + if (isa_and_nonnull(Init)) { + if (ZeroInitPadding && + !DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize, + SizeSoFar)) + return false; continue; + } // Zero-sized fields are not emitted, but their initializers may still // prevent emission of this struct as a constant. @@ -743,6 +764,11 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { continue; } + if (ZeroInitPadding && + !DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize, + SizeSoFar)) + return false; + // When emitting a DesignatedInitUpdateExpr, a nested InitListExpr // represents additional overwriting of our current constant value, and not // a new constant to emit independently. @@ -768,6 +794,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { if (!EltInit) return false; + if (ZeroInitPadding && FieldSize.isZero()) + SizeSoFar += CharUnits::fromQuantity( + CGM.getDataLayout().getTypeAllocSize(EltInit->getType())); + if (!Field->isBitField()) { // Handle non-bitfield members. if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit, @@ -785,6 +815,9 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { } } + if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar)) + return false; + return true; } @@ -849,6 +882,9 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, unsigned FieldNo = 0; uint64_t OffsetBits = CGM.getContext().toBits(Offset); + const bool ZeroInitPadding = CGM.shouldZeroInitPadding(); + CharUnits FieldSize = CharUnits::Zero(); + CharUnits SizeSoFar = CharUnits::Zero(); bool AllowOverwrite = false; for (RecordDecl::field_iterator Field = RD->field_begin(), @@ -870,6 +906,15 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, if (!EltInit) return false; + if (ZeroInitPadding) { + if (!DoZeroInitPadding(Layout, FieldNo, **Field, AllowOverwrite, + FieldSize, SizeSoFar)) + return false; + if (FieldSize.isZero()) + SizeSoFar += CharUnits::fromQuantity( + CGM.getDataLayout().getTypeAllocSize(EltInit->getType())); + } + if (!Field->isBitField()) { // Handle non-bitfield members. if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, @@ -886,7 +931,35 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, return false; } } + if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar)) + return false; + + return true; +} +bool ConstStructBuilder::DoZeroInitPadding( + const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field, + bool AllowOverwrite, CharUnits &FieldSize, CharUnits &SizeSoFar) { + CharUnits Offset = + CGM.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo)); + if (SizeSoFar < Offset) + if (!AppendBytes(SizeSoFar, getPadding(CGM, Offset - SizeSoFar), + AllowOverwrite)) + return false; + FieldSize = CGM.getContext().getTypeSizeInChars(Field.getType()); + SizeSoFar = Offset + FieldSize; + return true; +} + +bool ConstStructBuilder::DoZeroInitPadding(const ASTRecordLayout &Layout, + bool AllowOverwrite, + CharUnits &SizeSoFar) { + CharUnits TotalSize = Layout.getSize(); + if (SizeSoFar < TotalSize) + if (!AppendBytes(SizeSoFar, getPadding(CGM, TotalSize - SizeSoFar), + AllowOverwrite)) + return false; + SizeSoFar = TotalSize; return true; } @@ -1127,12 +1200,10 @@ class ConstExprEmitter assert(CurSize <= TotalSize && "Union size mismatch!"); if (unsigned NumPadBytes = TotalSize - CurSize) { - llvm::Type *Ty = CGM.CharTy; - if (NumPadBytes > 1) - Ty = llvm::ArrayType::get(Ty, NumPadBytes); - - Elts.push_back(llvm::UndefValue::get(Ty)); - Types.push_back(Ty); + llvm::Constant *Padding = + getPadding(CGM, CharUnits::fromQuantity(NumPadBytes)); + Elts.push_back(Padding); + Types.push_back(Padding->getType()); } llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index c58bb88035ca8..fcdfef03088da 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1676,6 +1676,57 @@ class CodeGenModule : public CodeGenTypeCache { MustTailCallUndefinedGlobals.insert(Global); } + bool shouldZeroInitPadding() const { + // In C23 (N3096) $6.7.10: + // """ + // If any object is initialized with an empty iniitializer, then it is + // subject to default initialization: + // - if it is an aggregate, every member is initialized (recursively) + // according to these rules, and any padding is initialized to zero bits; + // - if it is a union, the first named member is initialized (recursively) + // according to these rules, and any padding is initialized to zero bits. + // + // If the aggregate or union contains elements or members that are + // aggregates or unions, these rules apply recursively to the subaggregates + // or contained unions. + // + // If there are fewer initializers in a brace-enclosed list than there are + // elements or members of an aggregate, or fewer characters in a string + // literal used to initialize an array of known size than there are elements + // in the array, the remainder of the aggregate is subject to default + // initialization. + // """ + // + // From my understanding, the standard is ambiguous in the following two + // areas: + // 1. For a union type with empty initializer, if the first named member is + // not the largest member, then the bytes comes after the first named member + // but before padding are left unspecified. An example is: + // union U { int a; long long b;}; + // union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left + // unspecified. + // + // 2. It only mentions padding for empty initializer, but doesn't mention + // padding for a non empty initialization list. And if the aggregation or + // union contains elements or members that are aggregates or unions, and + // some are non empty initializers, while others are empty initiailizers, + // the padding initialization is unclear. An example is: + // struct S1 { int a; long long b; }; + // struct S2 { char c; struct S1 s1; }; + // // The values for paddings between s2.c and s2.s1.a, between s2.s1.a + // and s2.s1.b are unclear. + // struct S2 s2 = { 'c' }; + // + // Here we choose to zero initiailize left bytes of a union type. Because + // projects like the Linux kernel are relying on this behavior. If we don't + // explicitly zero initialize them, the undef values can be optimized to + // return gabage data. We also choose to zero initialize paddings for + // aggregates and unions, no matter they are initialized by empty + // initializers or non empty initializers. This can provide a consistent + // behavior. So projects like the Linux kernel can rely on it. + return !getLangOpts().CPlusPlus; + } + private: bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const; diff --git a/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c b/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c index b72d689659e60..b639734ef5d4b 100644 --- a/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c +++ b/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c @@ -8,4 +8,4 @@ struct et7 { 52, }; -// CHECK: @yv7 ={{.*}} global %struct.et7 { [0 x float] zeroinitializer, i8 52 } +// CHECK: @yv7 ={{.*}} global { [0 x float], i8, [3 x i8] } { [0 x float] zeroinitializer, i8 52, [3 x i8] zeroinitializer } diff --git a/clang/test/CodeGen/2008-08-07-AlignPadding1.c b/clang/test/CodeGen/2008-08-07-AlignPadding1.c index 17e88ce02659f..d69cbc22cc1df 100644 --- a/clang/test/CodeGen/2008-08-07-AlignPadding1.c +++ b/clang/test/CodeGen/2008-08-07-AlignPadding1.c @@ -20,9 +20,9 @@ struct gc_generation { #define GEN_HEAD(n) (&generations[n].head) -// The idea is that there are 6 undefs in this structure initializer to cover +// The idea is that there are 6 zeroinitializers in this structure initializer to cover // the padding between elements. -// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] undef }, i32 700, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }] +// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] zeroinitializer }, i32 700, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }] /* linked lists of container objects */ struct gc_generation generations[3] = { /* PyGC_Head, threshold, count */ diff --git a/clang/test/CodeGen/2009-06-14-anonymous-union-init.c b/clang/test/CodeGen/2009-06-14-anonymous-union-init.c index 13f6357f7966d..a4375d7868f01 100644 --- a/clang/test/CodeGen/2009-06-14-anonymous-union-init.c +++ b/clang/test/CodeGen/2009-06-14-anonymous-union-init.c @@ -7,7 +7,7 @@ struct sysfs_dirent { }; struct sysfs_dirent sysfs_root = { {}, 16877 }; -// CHECK: @sysfs_root = {{.*}}global %struct.sysfs_dirent { %union.anon zeroinitializer, i16 16877 } +// CHECK: @sysfs_root = {{.*}}global { %union.anon, i16, [2 x i8] } { %union.anon zeroinitializer, i16 16877, [2 x i8] zeroinitializer } struct Foo { union { struct empty {} x; }; @@ -16,4 +16,4 @@ struct Foo { struct Foo foo = { {}, 16877 }; // EMPTY: @foo = {{.*}}global %struct.Foo { i16 16877 } -// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] undef, i16 16877 } +// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] zeroinitializer, i16 16877 } diff --git a/clang/test/CodeGen/64bit-swiftcall.c b/clang/test/CodeGen/64bit-swiftcall.c index 7af65ccf556a8..7f8aa02d97ce1 100644 --- a/clang/test/CodeGen/64bit-swiftcall.c +++ b/clang/test/CodeGen/64bit-swiftcall.c @@ -14,8 +14,6 @@ // CHECK-DAG: %struct.atomic_padded = type { { %struct.packed, [7 x i8] } } // CHECK-DAG: %struct.packed = type <{ i64, i8 }> -// -// CHECK: [[STRUCT2_RESULT:@.*]] = private {{.*}} constant [[STRUCT2_TYPE:%.*]] { i32 0, i8 0, i8 undef, i8 0, i32 0, i32 0 } /*****************************************************************************/ /****************************** PARAMETER ABIS *******************************/ @@ -162,8 +160,8 @@ typedef struct { } struct_2; TEST(struct_2); // CHECK-LABEL: define{{.*}} swiftcc { i64, i64 } @return_struct_2() {{.*}}{ -// CHECK: [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4 -// CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[RET]], {{.*}}[[STRUCT2_RESULT]] +// CHECK: [[RET:%.*]] = alloca [[STRUCT2:%.*]], align 4 +// CHECK: call void @llvm.memset // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 1 @@ -173,7 +171,7 @@ TEST(struct_2); // CHECK: ret { i64, i64 } [[R1]] // CHECK: } // CHECK-LABEL: define{{.*}} swiftcc void @take_struct_2(i64 %0, i64 %1) {{.*}}{ -// CHECK: [[V:%.*]] = alloca [[STRUCT:%.*]], align 4 +// CHECK: [[V:%.*]] = alloca [[STRUCT2]], align 4 // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 0 // CHECK: store i64 %0, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 1 @@ -181,7 +179,7 @@ TEST(struct_2); // CHECK: ret void // CHECK: } // CHECK-LABEL: define{{.*}} void @test_struct_2() {{.*}} { -// CHECK: [[TMP:%.*]] = alloca [[STRUCT2_TYPE]], align 4 +// CHECK: [[TMP:%.*]] = alloca [[STRUCT2]], align 4 // CHECK: [[CALL:%.*]] = call swiftcc { i64, i64 } @return_struct_2() // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw {{.*}} [[TMP]], i32 0, i32 0 // CHECK: [[T0:%.*]] = extractvalue { i64, i64 } [[CALL]], 0 @@ -254,7 +252,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define{{.*}} swiftcc i64 @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 -// CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[RET]] +// CHECK: call void @llvm.memset{{.*}}(ptr align 8 [[RET]] // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw { i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, ptr [[GEP]], align 8 // CHECK: ret i64 [[R0]] diff --git a/clang/test/CodeGen/arm-swiftcall.c b/clang/test/CodeGen/arm-swiftcall.c index ec0e3867909a8..677b878c6765d 100644 --- a/clang/test/CodeGen/arm-swiftcall.c +++ b/clang/test/CodeGen/arm-swiftcall.c @@ -172,7 +172,7 @@ typedef struct { TEST(struct_2); // CHECK-LABEL: define{{.*}} @return_struct_2() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align 4 -// CHECK: @llvm.memcpy +// CHECK: @llvm.memset // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG:{ i32, i32, float, float }]], ptr [[RET]], i32 0, i32 0 // CHECK: [[FIRST:%.*]] = load i32, ptr [[T0]], align 4 // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG]], ptr [[RET]], i32 0, i32 1 @@ -274,7 +274,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define{{.*}} @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align {{(4|8)}} -// CHECK: @llvm.memcpy +// CHECK: @llvm.memset // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG:{ i32, i32 }]], ptr [[RET]], i32 0, i32 0 // CHECK: [[FIRST:%.*]] = load i32, ptr [[T0]], align {{(4|8)}} // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG]], ptr [[RET]], i32 0, i32 1 diff --git a/clang/test/CodeGen/const-init.c b/clang/test/CodeGen/const-init.c index ad3e9551199ac..fc973cb983a80 100644 --- a/clang/test/CodeGen/const-init.c +++ b/clang/test/CodeGen/const-init.c @@ -170,7 +170,7 @@ void g30(void) { int : 1; int x; } a = {}; - // CHECK: @g30.a = internal global %struct.anon.1 <{ i8 undef, i32 0 }>, align 1 + // CHECK: @g30.a = internal global %struct.anon.1 zeroinitializer, align 1 #pragma pack() } @@ -182,7 +182,7 @@ void g31(void) { short z; } a = {23122, -12312731, -312}; #pragma pack() - // CHECK: @g31.a = internal global %struct.anon.2 { i16 23122, i32 -12312731, i16 -312 }, align 4 + // CHECK: @g31.a = internal global { i16, [2 x i8], i32, i16, [2 x i8] } { i16 23122, [2 x i8] zeroinitializer, i32 -12312731, i16 -312, [2 x i8] zeroinitializer }, align 4 } // Clang should evaluate this in constant context, so floating point mode should diff --git a/clang/test/CodeGen/decl.c b/clang/test/CodeGen/decl.c index a63846b3223da..97446781fdbd2 100644 --- a/clang/test/CodeGen/decl.c +++ b/clang/test/CodeGen/decl.c @@ -2,10 +2,10 @@ // CHECK: @test1.x = internal constant [12 x i32] [i32 1 // CHECK: @__const.test2.x = private unnamed_addr constant [13 x i32] [i32 1, -// CHECK: @test5w = {{(dso_local )?}}global { i32, [4 x i8] } { i32 2, [4 x i8] undef } +// CHECK: @test5w = {{(dso_local )?}}global { i32, [4 x i8] } { i32 2, [4 x i8] zeroinitializer } // CHECK: @test5y = {{(dso_local )?}}global { double } { double 7.300000e+0{{[0]*}}1 } -// CHECK: @__const.test6.x = private unnamed_addr constant %struct.SelectDest { i8 1, i8 2, i32 3, i32 0 } +// CHECK: @__const.test6.x = private unnamed_addr constant { i8, i8, [2 x i8], i32, i32 } { i8 1, i8 2, [2 x i8] zeroinitializer, i32 3, i32 0 } // CHECK: @test7 = {{(dso_local )?}}global [2 x %struct.test7s] [%struct.test7s { i32 1, i32 2 }, %struct.test7s { i32 4, i32 0 }] diff --git a/clang/test/CodeGen/designated-initializers.c b/clang/test/CodeGen/designated-initializers.c index 620b1b90d2575..ac7860db43be7 100644 --- a/clang/test/CodeGen/designated-initializers.c +++ b/clang/test/CodeGen/designated-initializers.c @@ -8,7 +8,7 @@ struct foo { // CHECK: @u ={{.*}} global %union.anon zeroinitializer union { int i; float f; } u = { }; -// CHECK: @u2 ={{.*}} global { i32, [4 x i8] } { i32 0, [4 x i8] undef } +// CHECK: @u2 ={{.*}} global { i32, [4 x i8] } zeroinitializer union { int i; double f; } u2 = { }; // CHECK: @u3 ={{.*}} global %union.anon.1 zeroinitializer @@ -62,22 +62,22 @@ struct overwrite_string_struct2 { char L[6]; int M; } overwrite_string2[] = { { { "foo" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [6 x i8] c"fox\00\00\00", i32 1 +// CHECK: [6 x i8] c"fox\00\00\00", [2 x i8] zeroinitializer, i32 1 struct overwrite_string_struct3 { char L[3]; int M; } overwrite_string3[] = { { { "foo" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [3 x i8] c"fox", i32 1 +// CHECK: [3 x i8] c"fox", i8 0, i32 1 struct overwrite_string_struct4 { char L[3]; int M; } overwrite_string4[] = { { { "foobar" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [3 x i8] c"fox", i32 1 +// CHECK: [3 x i8] c"fox", i8 0, i32 1 struct overwrite_string_struct5 { char L[6]; int M; } overwrite_string5[] = { { { "foo" }, 1 }, [0].L[4] = 'y'}; -// CHECK: [6 x i8] c"foo\00y\00", i32 1 +// CHECK: [6 x i8] c"foo\00y\00", [2 x i8] zeroinitializer, i32 1 // CHECK: @u1 = {{.*}} { i32 65535 } @@ -138,7 +138,7 @@ union_16644_t union_16644_instance_4[2] = [1].b[1] = 4 }; -// CHECK: @lab ={{.*}} global { [4 x i8], i32 } { [4 x i8] undef, i32 123 } +// CHECK: @lab ={{.*}} global { [4 x i8], i32 } { [4 x i8] zeroinitializer, i32 123 } struct leading_anon_bitfield { int : 32; int n; } lab = { .n = 123 }; struct Base { diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c index e3d609a4ba4a2..aebacd6f22ffc 100644 --- a/clang/test/CodeGen/ext-int.c +++ b/clang/test/CodeGen/ext-int.c @@ -16,7 +16,7 @@ unsigned _BitInt(1) GlobSize1 = 0; // CHECK: @GlobSize1 = {{.*}}global i8 0 -// CHECK64: @__const.foo.A = private unnamed_addr constant { i32, [4 x i8], <{ i8, [23 x i8] }> } { i32 1, [4 x i8] undef, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8 +// CHECK64: @__const.foo.A = private unnamed_addr constant { i32, [4 x i8], <{ i8, [23 x i8] }> } { i32 1, [4 x i8] zeroinitializer, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8 // @BigGlob = global [40 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF", align 8 // CHECK64: @f.p = internal global <{ i8, i8, [22 x i8] }> <{ i8 16, i8 39, [22 x i8] zeroinitializer }>, align 8 @@ -91,8 +91,8 @@ int foo(int a) { // CHECK64: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 2 // WIN32: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 2 // LIN32: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 1 - // CHECK: %0 = load i32, ptr %a.addr, align 4 - // CHECK: %conv = sext i32 %0 to i129 + // CHECK: %[[V1:.+]] = load i32, ptr %a.addr, align 4 + // CHECK: %conv = sext i32 %[[V1]] to i129 // CHECK64: storedv = sext i129 %conv to i192 // WIN32: storedv = sext i129 %conv to i192 // LIN32: storedv = sext i129 %conv to i160 @@ -102,12 +102,12 @@ int foo(int a) { // CHECK64: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 2 // WIN32: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 2 // LIN32: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 1 - // CHECK64: %1 = load i192, ptr %B3, align 8 - // WIN32: %1 = load i192, ptr %B3, align 8 - // LIN32: %1 = load i160, ptr %B3, align 4 - // CHECK64: %loadedv = trunc i192 %1 to i129 - // WIN32: %loadedv = trunc i192 %1 to i129 - // LIN32: %loadedv = trunc i160 %1 to i129 + // CHECK64: %[[V2:.+]] = load i192, ptr %B3, align 8 + // WIN32: %[[V2:.+]] = load i192, ptr %B3, align 8 + // LIN32: %[[V2:.+]] = load i160, ptr %B3, align 4 + // CHECK64: %loadedv = trunc i192 %[[V2]] to i129 + // WIN32: %loadedv = trunc i192 %[[V2]] to i129 + // LIN32: %loadedv = trunc i160 %[[V2]] to i129 // CHECK: %conv4 = trunc i129 %loadedv to i32 struct S1 A = {1, 170}; struct S1 B = {1, a}; diff --git a/clang/test/CodeGen/flexible-array-init.c b/clang/test/CodeGen/flexible-array-init.c index 15a30c15ac966..17b520fe83094 100644 --- a/clang/test/CodeGen/flexible-array-init.c +++ b/clang/test/CodeGen/flexible-array-init.c @@ -14,11 +14,11 @@ struct { int y[]; } b1 = { { 14, 16 } }; // sizeof(c) == 8, so this global should be at least 8 bytes. struct { int x; char c; char y[]; } c = { 1, 2, { 13, 15 } }; -// CHECK: @c ={{.*}} global { i32, i8, [2 x i8] } { i32 1, i8 2, [2 x i8] c"\0D\0F" } +// CHECK: @c ={{.*}} global { i32, i8, [2 x i8], i8 } { i32 1, i8 2, [2 x i8] c"\0D\0F", i8 0 } // sizeof(d) == 8, so this global should be at least 8 bytes. struct __attribute((packed, aligned(4))) { char a; int x; char z[]; } d = { 1, 2, { 13, 15 } }; -// CHECK: @d ={{.*}} <{ i8, i32, [2 x i8], i8 }> <{ i8 1, i32 2, [2 x i8] c"\0D\0F", i8 undef }>, +// CHECK: @d ={{.*}} <{ i8, i32, [2 x i8], i8 }> <{ i8 1, i32 2, [2 x i8] c"\0D\0F", i8 0 }>, // This global needs 9 bytes to hold all the flexible array members. struct __attribute((packed, aligned(4))) { char a; int x; char z[]; } e = { 1, 2, { 13, 15, 17, 19 } }; @@ -55,21 +55,21 @@ struct { int a; union { int b; short x[]; }; int c; int d; } hf = {1, 2, {}, 3}; // First member is the potential flexible array, initialization requires braces. struct { int a; union { short x; int b; }; int c; int d; } i = {1, 2, {}, 3}; -// CHECK: @i = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] undef }, i32 0, i32 3 } +// CHECK: @i = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] zeroinitializer }, i32 0, i32 3 } struct { int a; union { short x[0]; int b; }; int c; int d; } i0 = {1, {}, 2, 3}; -// CHECK: @i0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 2, i32 3 } +// CHECK: @i0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } zeroinitializer, i32 2, i32 3 } struct { int a; union { short x[1]; int b; }; int c; int d; } i1 = {1, {2}, {}, 3}; -// CHECK: @i1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 0, i32 3 } +// CHECK: @i1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] zeroinitializer }, i32 0, i32 3 } struct { int a; union { short x[]; int b; }; int c; int d; } i_f = {4, {}, {}, 6}; -// CHECK: @i_f = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 4, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 0, i32 6 } +// CHECK: @i_f = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 4, { [0 x i16], [4 x i8] } zeroinitializer, i32 0, i32 6 } // Named initializers; order doesn't matter. struct { int a; union { int b; short x; }; int c; int d; } hn = {.a = 1, .x = 2, .c = 3}; -// CHECK: @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] undef }, i32 3, i32 0 } +// CHECK: @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] zeroinitializer }, i32 3, i32 0 } struct { int a; union { int b; short x[0]; }; int c; int d; } hn0 = {.a = 1, .x = {2}, .c = 3}; -// CHECK: @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 3, i32 0 } +// CHECK: @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } zeroinitializer, i32 3, i32 0 } struct { int a; union { int b; short x[1]; }; int c; int d; } hn1 = {.a = 1, .x = {2}, .c = 3}; -// CHECK: @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 3, i32 0 } +// CHECK: @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] zeroinitializer }, i32 3, i32 0 } struct { char a[]; } empty_struct = {}; // CHECK: @empty_struct ={{.*}} global %struct.anon{{.*}} zeroinitializer, align 1 @@ -96,10 +96,10 @@ union { char a[]; } only_in_union0 = {0}; // CHECK: @only_in_union0 = global { [1 x i8] } zeroinitializer, align 1 union { char a[]; int b; } first_in_union = {}; -// CHECK: @first_in_union = global { [0 x i8], [4 x i8] } { [0 x i8] zeroinitializer, [4 x i8] undef }, align 4 +// CHECK: @first_in_union = global { [0 x i8], [4 x i8] } zeroinitializer, align 4 union { char a[]; int b; } first_in_union0 = {0}; -// CHECK: @first_in_union0 = global { [1 x i8], [3 x i8] } { [1 x i8] zeroinitializer, [3 x i8] undef }, align 4 +// CHECK: @first_in_union0 = global { [1 x i8], [3 x i8] } zeroinitializer, align 4 union { char a[]; int b; } first_in_union123 = { {1, 2, 3} }; -// CHECK: @first_in_union123 = global { [3 x i8], i8 } { [3 x i8] c"\01\02\03", i8 undef }, align 4 +// CHECK: @first_in_union123 = global { [3 x i8], i8 } { [3 x i8] c"\01\02\03", i8 0 }, align 4 diff --git a/clang/test/CodeGen/global-init.c b/clang/test/CodeGen/global-init.c index 7f1d675b97c09..b156466dbaaff 100644 --- a/clang/test/CodeGen/global-init.c +++ b/clang/test/CodeGen/global-init.c @@ -33,7 +33,7 @@ struct ManyFields { int f; }; -// CHECK: global %struct.ManyFields { i32 1, i32 2, i32 0, i8 0, i32 0, i32 0 } +// CHECK: global { i32, i32, i32, i8, [3 x i8], i32, i32 } { i32 1, i32 2, i32 0, i8 0, [3 x i8] zeroinitializer, i32 0, i32 0 } struct ManyFields FewInits = {1, 2}; diff --git a/clang/test/CodeGen/init.c b/clang/test/CodeGen/init.c index cbf615bb9ddfe..27f427dff8f79 100644 --- a/clang/test/CodeGen/init.c +++ b/clang/test/CodeGen/init.c @@ -187,25 +187,6 @@ void nonzeroMemsetf64(void) { // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 68, i32 56, i1 false) } -void nonzeroPaddedUnionMemset(void) { - union U { char c; int i; }; - union U arr[9] = { 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, }; - // CHECK-LABEL: @nonzeroPaddedUnionMemset( - // CHECK-NOT: store - // CHECK-NOT: memcpy - // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 -16, i32 36, i1 false) -} - -void nonzeroNestedMemset(void) { - union U { char c; int i; }; - struct S { union U u; short i; }; - struct S arr[5] = { { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, }; - // CHECK-LABEL: @nonzeroNestedMemset( - // CHECK-NOT: store - // CHECK-NOT: memcpy - // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 -16, i32 40, i1 false) -} - // PR9257 struct test11S { int A[10]; diff --git a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c b/clang/test/CodeGen/linux-kernel-struct-union-initializer.c new file mode 100644 index 0000000000000..dc68cc0f454c8 --- /dev/null +++ b/clang/test/CodeGen/linux-kernel-struct-union-initializer.c @@ -0,0 +1,267 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=gnu11 -verify -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +union U1 { + int x; + char y[16]; +}; + +struct S1 { + int x; + union U1 y; +}; + +union U2 { + int x; + char y[16]; +} __attribute__((__aligned__(32))); + +struct S2 { + int x; + long long y; + char z[8]; +} __attribute__((__aligned__(32))); + +union U1 global_u1 = {}; + +union U1 global_u2 = {3}; + +union U1 global_u2_from_cast = (union U1)3; + +struct S1 global_s1 = {}; + +struct S1 global_s2 = { + .x = 3, +}; + +struct S1 global_s3 = {.x = 3, .y = {.x = 6}}; + +const union U1 global_const_u1 = {4}; +struct S1 global_s3_from_const_u1 = {.y = global_const_u1}; + +union U2 global_u3 = {}; + +struct S2 global_s4 = {}; + +struct S2 global_s5 = {.x = 1}; + + +// Test empty initializer for union. +//. +// CHECK: @global_u1 = global %union.U1 zeroinitializer, align 4 +// CHECK: @global_u2 = global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 +// CHECK: @global_u2_from_cast = global { i32, [12 x i8] } { i32 3, [12 x i8] zeroinitializer }, align 4 +// CHECK: @global_s1 = global %struct.S1 zeroinitializer, align 4 +// CHECK: @global_s2 = global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 +// CHECK: @global_s3 = global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 +// CHECK: @global_const_u1 = constant %union.U1 { i32 4, [12 x i8] zeroinitializer }, align 4 +// CHECK: @global_s3_from_const_u1 = global %struct.S1 { i32 0, %union.U1 { i32 4, [12 x i8] zeroinitializer } }, align 4 +// CHECK: @global_u3 = global %union.U2 zeroinitializer, align 32 +// CHECK: @global_s4 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } zeroinitializer, align 32 +// CHECK: @global_s5 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 +// CHECK: @test2.a = internal global %union.U1 zeroinitializer, align 4 +// CHECK: @__const.test3.a = private unnamed_addr constant %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 +// CHECK: @test4.a = internal global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 +// CHECK: @test6.s = internal global %struct.S1 zeroinitializer, align 4 +// CHECK: @__const.test7.s = private unnamed_addr constant %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 +// CHECK: @test8.s = internal global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 +// CHECK: @__const.test9.s = private unnamed_addr constant %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 +// CHECK: @test10.s = internal global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 +// CHECK: @test12.a = internal global %union.U2 zeroinitializer, align 32 +// CHECK: @test14.s = internal global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } zeroinitializer, align 32 +// CHECK: @__const.test15.s = private unnamed_addr constant { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 +// CHECK: @test16.s = internal global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 +//. +// CHECK-LABEL: define dso_local void @test1( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 16, i1 false) +// CHECK-NEXT: ret void +// +void test1() { + union U1 a = {}; +} + +// Test empty initializer for union. Use static variable. +// CHECK-LABEL: define dso_local void @test2( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// +void test2() { + static union U1 a = {}; +} + +// Test only initializing a small field for union. +// CHECK-LABEL: define dso_local void @test3( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 @__const.test3.a, i64 16, i1 false) +// CHECK-NEXT: ret void +// +void test3() { + union U1 a = {3}; +} + +// Test only initializing a small field for union. Use static variable. +// CHECK-LABEL: define dso_local void @test4( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// +void test4() { + static union U1 a = {3}; +} + +// Test union in struct. Use empty initializer for the struct. +// CHECK-LABEL: define dso_local void @test5( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[S]], i8 0, i64 20, i1 false) +// CHECK-NEXT: ret void +// +void test5() { + struct S1 s = {}; +} + +// Test union in struct. Use empty initializer for the struct. Use static variable. +// CHECK-LABEL: define dso_local void @test6( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// +void test6() { + static struct S1 s = {}; +} + +// Test union in struct. Initialize other fields of the struct. +// CHECK-LABEL: define dso_local void @test7( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S]], ptr align 4 @__const.test7.s, i64 20, i1 false) +// CHECK-NEXT: ret void +// +void test7() { + struct S1 s = { + .x = 3, + }; +} + +// Test union in struct. Initialize other fields of the struct. Use static variable. +// CHECK-LABEL: define dso_local void @test8( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// +void test8() { + static struct S1 s = { + .x = 3, + }; +} + +// Test union in struct. Initialize a small field for union. +// CHECK-LABEL: define dso_local void @test9( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S]], ptr align 4 @__const.test9.s, i64 20, i1 false) +// CHECK-NEXT: ret void +// +void test9() { + struct S1 s = {.x = 3, + .y = { + .x = 6, + }}; +} + +// Test union in struct. Initialize a small field for union. Use static variable. +// CHECK-LABEL: define dso_local void @test10( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// +void test10() { + static struct S1 s = {.x = 3, + .y = { + .x = 6, + }}; +} + +// Test empty initializer for union with padding. +// CHECK-LABEL: define dso_local void @test11( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U2:%.*]], align 32 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[A]], i8 0, i64 32, i1 false) +// CHECK-NEXT: ret void +// +void test11() { + union U2 a = {}; +} + +// Test empty initializer for union with padding. Use static variable. +// CHECK-LABEL: define dso_local void @test12( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// +void test12() { + static union U2 a = {}; +} + +// Test empty initializer for struct with padding. +// CHECK-LABEL: define dso_local void @test13( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 32 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[S]], i8 0, i64 32, i1 false) +// CHECK-NEXT: ret void +// +void test13() { + struct S2 s = {}; +} + +// Test empty initializer for struct with padding. Use static variable. +// CHECK-LABEL: define dso_local void @test14( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// +void test14() { + static struct S2 s = {}; +} + +// Test partial initialization for struct with padding. +// CHECK-LABEL: define dso_local void @test15( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 32 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 32 [[S]], ptr align 32 @__const.test15.s, i64 32, i1 false) +// CHECK-NEXT: ret void +// +void test15() { + struct S2 s = {.x = 1}; +} + +// Test partial initialization for struct with padding. Use static variable. +// CHECK-LABEL: define dso_local void @test16( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret void +// +void test16() { + static struct S2 s = {.x = 1}; +} +//. +// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +// CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c b/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c new file mode 100644 index 0000000000000..0a1ad3a369eac --- /dev/null +++ b/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=gnu11 -verify -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +union U1 { + int x; + char y[5]; +}; + +struct S1 { + int x; + long long y; +}; + +struct S2 { + unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1 + unsigned char : 2; // next 2 bits (in 1st byte) are blocked out as unused + unsigned char b2 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd + unsigned char b3 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte + int i; +}; + +struct S3 { + int x; +} __attribute__((__aligned__(8))); + +struct S4 { + int a; + union U1 b; +}; + +// Test non-const initializer for union with padding. +// CHECK-LABEL: define dso_local void @test1( +// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 +// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[A]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 4 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 4, i1 false) +// CHECK-NEXT: ret void +// +void test1(int x) { + union U1 a = {x}; +} + +// Test non-const initializer for struct with padding. +// CHECK-LABEL: define dso_local void @test2( +// CHECK-SAME: i64 noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 +// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[S]], i32 0, i32 0 +// CHECK-NEXT: store i32 0, ptr [[X]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i64 4 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 4, i1 false) +// CHECK-NEXT: [[Y1:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[S]], i32 0, i32 1 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y_ADDR]], align 8 +// CHECK-NEXT: store i64 [[TMP1]], ptr [[Y1]], align 8 +// CHECK-NEXT: ret void +// +void test2(long long y) { + struct S1 s = {.y = y}; +} + +// Test non-const initializer for struct with padding and bit fields. +// CHECK-LABEL: define dso_local void @test3( +// CHECK-SAME: i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 4 +// CHECK-NEXT: store i8 [[B]], ptr [[B_ADDR]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[B_ADDR]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i16 +// CHECK-NEXT: [[BF_LOAD:%.*]] = load i16, ptr [[S]], align 4 +// CHECK-NEXT: [[BF_VALUE:%.*]] = and i16 [[TMP1]], 7 +// CHECK-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -8 +// CHECK-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK-NEXT: store i16 [[BF_SET]], ptr [[S]], align 4 +// CHECK-NEXT: [[BF_LOAD1:%.*]] = load i16, ptr [[S]], align 4 +// CHECK-NEXT: [[BF_CLEAR2:%.*]] = and i16 [[BF_LOAD1]], -16129 +// CHECK-NEXT: [[BF_SET3:%.*]] = or i16 [[BF_CLEAR2]], 0 +// CHECK-NEXT: store i16 [[BF_SET3]], ptr [[S]], align 4 +// CHECK-NEXT: [[BF_LOAD4:%.*]] = load i16, ptr [[S]], align 4 +// CHECK-NEXT: [[BF_CLEAR5:%.*]] = and i16 [[BF_LOAD4]], 16383 +// CHECK-NEXT: [[BF_SET6:%.*]] = or i16 [[BF_CLEAR5]], 0 +// CHECK-NEXT: store i16 [[BF_SET6]], ptr [[S]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i64 2 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP2]], i8 0, i64 2, i1 false) +// CHECK-NEXT: [[I:%.*]] = getelementptr inbounds nuw [[STRUCT_S2]], ptr [[S]], i32 0, i32 1 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: ret void +// +void test3(unsigned char b) { + struct S2 s = {.b1 = b}; +} + +// Test non-const initializer for struct with padding at the end of the struct. +// CHECK-LABEL: define dso_local void @test4( +// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S3:%.*]], align 8 +// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_S3]], ptr [[S]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[X1]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i64 4 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 4, i1 false) +// CHECK-NEXT: ret void +// +void test4(int x) { + struct S3 s = {x}; +} + +// Test non-const initializer for union in struct. +// CHECK-LABEL: define dso_local void @test5( +// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S4:%.*]], align 4 +// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_S4]], ptr [[S]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[A1]], align 4 +// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_S4]], ptr [[S]], i32 0, i32 1 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[B2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B2]], i64 4 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP2]], i8 0, i64 4, i1 false) +// CHECK-NEXT: ret void +// +void test5(int a, int b) { + struct S4 s = {a, {b}}; +} diff --git a/clang/test/CodeGen/mingw-long-double.c b/clang/test/CodeGen/mingw-long-double.c index 4be97526f9631..0fc8f01509682 100644 --- a/clang/test/CodeGen/mingw-long-double.c +++ b/clang/test/CodeGen/mingw-long-double.c @@ -11,12 +11,9 @@ struct { char c; long double ldb; } agggregate_LD = {}; -// GNU32: %struct.anon = type { i8, x86_fp80 } -// GNU32: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 4 -// GNU64: %struct.anon = type { i8, x86_fp80 } -// GNU64: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 16 -// MSC64: %struct.anon = type { i8, double } -// MSC64: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 8 +// GNU32: @agggregate_LD = dso_local global { i8, [3 x i8], x86_fp80 } zeroinitializer, align 4 +// GNU64: @agggregate_LD = dso_local global { i8, [15 x i8], x86_fp80 } zeroinitializer, align 16 +// MSC64: @agggregate_LD = dso_local global { i8, [7 x i8], double } zeroinitializer, align 8 long double dataLD = 1.0L; // GNU32: @dataLD = dso_local global x86_fp80 0xK3FFF8000000000000000, align 4 diff --git a/clang/test/CodeGen/mms-bitfields.c b/clang/test/CodeGen/mms-bitfields.c index 49c5c1c3e7d40..2ccce326c7131 100644 --- a/clang/test/CodeGen/mms-bitfields.c +++ b/clang/test/CodeGen/mms-bitfields.c @@ -61,5 +61,5 @@ union HEADER { struct Inner variable = { 1,0,1, 21 }; union HEADER hdr = {{1,2,3,4}}; -// CHECK: @variable ={{.*}} global { i8, [3 x i8], i8, i8, i8, i8 } { i8 5, [3 x i8] undef, i8 21, i8 0, i8 0, i8 0 }, align 1 -// CHECK: @hdr ={{.*}} global { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } } { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } { i8 8, i8 0, [2 x i8] undef, i8 2, i8 0, i8 0, i8 3, i8 4, [3 x i8] undef } }, align 1 +// CHECK: @variable ={{.*}} global { i8, [3 x i8], i8, i8, i8, i8 } { i8 5, [3 x i8] zeroinitializer, i8 21, i8 0, i8 0, i8 0 }, align 1 +// CHECK: @hdr ={{.*}} global { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } } { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } { i8 8, i8 0, [2 x i8] zeroinitializer, i8 2, i8 0, i8 0, i8 3, i8 4, [3 x i8] zeroinitializer } }, align 1 diff --git a/clang/test/CodeGen/union-init2.c b/clang/test/CodeGen/union-init2.c index 048ff00517b4e..ee35e78a4f301 100644 --- a/clang/test/CodeGen/union-init2.c +++ b/clang/test/CodeGen/union-init2.c @@ -2,11 +2,11 @@ // RUN: %clang_cc1 -x c++ %s -emit-llvm -triple x86_64-linux-gnu -o - | FileCheck %s --check-prefixes=CHECK-CXX // Make sure we generate something sane instead of a ptrtoint -// CHECK: @r, [4 x i8] undef +// CHECK: @r, [4 x i8] zeroinitializer union x {long long b;union x* a;} r = {.a = &r}; -// CHECK: global { [3 x i8], [5 x i8] } { [3 x i8] zeroinitializer, [5 x i8] undef } +// CHECK: global { [3 x i8], [5 x i8] } zeroinitializer union z { char a[3]; long long b; diff --git a/clang/test/CodeGen/windows-swiftcall.c b/clang/test/CodeGen/windows-swiftcall.c index bc7832d9d3ac2..41569c2606622 100644 --- a/clang/test/CodeGen/windows-swiftcall.c +++ b/clang/test/CodeGen/windows-swiftcall.c @@ -5,8 +5,6 @@ #define ERROR __attribute__((swift_error_result)) #define CONTEXT __attribute__((swift_context)) -// CHECK: [[STRUCT2_RESULT:@.*]] = private {{.*}} constant [[STRUCT2_TYPE:%.*]] { i32 0, i8 0, i8 undef, i8 0, i32 0, i32 0 } - /*****************************************************************************/ /****************************** PARAMETER ABIS *******************************/ /*****************************************************************************/ @@ -142,8 +140,8 @@ typedef struct { } struct_2; TEST(struct_2); // CHECK-LABEL: define dso_local swiftcc { i64, i64 } @return_struct_2() {{.*}}{ -// CHECK: [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4 -// CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[RET]], {{.*}}[[STRUCT2_RESULT]] +// CHECK: [[RET:%.*]] = alloca [[STRUCT2:%.*]], align 4 +// CHECK: call void @llvm.memset // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 1 @@ -153,7 +151,7 @@ TEST(struct_2); // CHECK: ret { i64, i64 } [[R1]] // CHECK: } // CHECK-LABEL: define dso_local swiftcc void @take_struct_2(i64 %0, i64 %1) {{.*}}{ -// CHECK: [[V:%.*]] = alloca [[STRUCT:%.*]], align 4 +// CHECK: [[V:%.*]] = alloca [[STRUCT2]], align 4 // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 0 // CHECK: store i64 %0, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 1 @@ -161,7 +159,7 @@ TEST(struct_2); // CHECK: ret void // CHECK: } // CHECK-LABEL: define dso_local void @test_struct_2() {{.*}} { -// CHECK: [[TMP:%.*]] = alloca [[STRUCT2_TYPE]], align 4 +// CHECK: [[TMP:%.*]] = alloca [[STRUCT2]], align 4 // CHECK: [[CALL:%.*]] = call swiftcc { i64, i64 } @return_struct_2() // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw {{.*}} [[TMP]], i32 0, i32 0 // CHECK: [[T0:%.*]] = extractvalue { i64, i64 } [[CALL]], 0 @@ -234,7 +232,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define dso_local swiftcc i64 @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 -// CHECK: call void @llvm.memcpy{{.*}}(ptr align {{[0-9]+}} [[RET]] +// CHECK: call void @llvm.memset{{.*}}(ptr align {{[0-9]+}} [[RET]] // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw { i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, ptr [[GEP]], align 8 // CHECK: ret i64 [[R0]] diff --git a/clang/test/CodeGenObjC/designated-initializers.m b/clang/test/CodeGenObjC/designated-initializers.m index a67f82e1afbea..ce58f6c367338 100644 --- a/clang/test/CodeGenObjC/designated-initializers.m +++ b/clang/test/CodeGenObjC/designated-initializers.m @@ -4,4 +4,4 @@ char L[3]; int M; } overwrite_string[] = { { { @encode(void**) }, 1 }, [0].L[1] = 'x'}; -// CHECK: [3 x i8] c"^xv", i32 1 +// CHECK: [3 x i8] c"^xv", i8 0, i32 1 From 489acb2401b51d940fcdbe965d4a5b2d39168b96 Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Tue, 24 Sep 2024 19:31:40 -0700 Subject: [PATCH 078/212] [NVPTX][NFC] Refactor utilities to use std::optional (#109883) --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 16 ++- llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 139 +++++++++------------- llvm/lib/Target/NVPTX/NVPTXUtilities.h | 24 ++-- 3 files changed, 70 insertions(+), 109 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 38c51666139a8..9bcc911b6c345 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -563,21 +563,19 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1) << ", " << Maxntidz.value_or(1) << "\n"; - unsigned Mincta = 0; - if (getMinCTASm(F, Mincta)) - O << ".minnctapersm " << Mincta << "\n"; + if (const auto Mincta = getMinCTASm(F)) + O << ".minnctapersm " << *Mincta << "\n"; - unsigned Maxnreg = 0; - if (getMaxNReg(F, Maxnreg)) - O << ".maxnreg " << Maxnreg << "\n"; + if (const auto Maxnreg = getMaxNReg(F)) + O << ".maxnreg " << *Maxnreg << "\n"; // .maxclusterrank directive requires SM_90 or higher, make sure that we // filter it out for lower SM versions, as it causes a hard ptxas crash. const NVPTXTargetMachine &NTM = static_cast(TM); const auto *STI = static_cast(NTM.getSubtargetImpl()); - unsigned Maxclusterrank = 0; - if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90) - O << ".maxclusterrank " << Maxclusterrank << "\n"; + if (STI->getSmVersion() >= 90) + if (const auto Maxclusterrank = getMaxClusterRank(F)) + O << ".maxclusterrank " << *Maxclusterrank << "\n"; } std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 80361744fd5b6..be1c87d07f4de 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -13,6 +13,7 @@ #include "NVPTXUtilities.h" #include "NVPTX.h" #include "NVPTXTargetMachine.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -130,8 +131,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { } } -bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, - unsigned &retval) { +static std::optional findOneNVVMAnnotation(const GlobalValue *gv, + const std::string &prop) { auto &AC = getAnnotationCache(); std::lock_guard Guard(AC.Lock); const Module *m = gv->getParent(); @@ -140,21 +141,13 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, else if (AC.Cache[m].find(gv) == AC.Cache[m].end()) cacheAnnotationFromMD(m, gv); if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end()) - return false; - retval = AC.Cache[m][gv][prop][0]; - return true; -} - -static std::optional -findOneNVVMAnnotation(const GlobalValue &GV, const std::string &PropName) { - unsigned RetVal; - if (findOneNVVMAnnotation(&GV, PropName, RetVal)) - return RetVal; - return std::nullopt; + return std::nullopt; + return AC.Cache[m][gv][prop][0]; } -bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, - std::vector &retval) { +static bool findAllNVVMAnnotation(const GlobalValue *gv, + const std::string &prop, + std::vector &retval) { auto &AC = getAnnotationCache(); std::lock_guard Guard(AC.Lock); const Module *m = gv->getParent(); @@ -168,25 +161,13 @@ bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, return true; } -bool isTexture(const Value &val) { - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "texture", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a texture symbol"); +static bool globalHasNVVMAnnotation(const Value &V, const std::string &Prop) { + if (const auto *GV = dyn_cast(&V)) + if (const auto Annot = findOneNVVMAnnotation(GV, Prop)) { + assert((*Annot == 1) && "Unexpected annotation on a symbol"); return true; } - } - return false; -} -bool isSurface(const Value &val) { - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "surface", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a surface symbol"); - return true; - } - } return false; } @@ -220,71 +201,60 @@ bool isParamGridConstant(const Value &V) { return false; } -bool isSampler(const Value &val) { +bool isTexture(const Value &V) { return globalHasNVVMAnnotation(V, "texture"); } + +bool isSurface(const Value &V) { return globalHasNVVMAnnotation(V, "surface"); } + +bool isSampler(const Value &V) { const char *AnnotationName = "sampler"; - if (const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, AnnotationName, Annot)) { - assert((Annot == 1) && "Unexpected annotation on a sampler symbol"); - return true; - } - } - return argHasNVVMAnnotation(val, AnnotationName); + return globalHasNVVMAnnotation(V, AnnotationName) || + argHasNVVMAnnotation(V, AnnotationName); } -bool isImageReadOnly(const Value &val) { - return argHasNVVMAnnotation(val, "rdoimage"); +bool isImageReadOnly(const Value &V) { + return argHasNVVMAnnotation(V, "rdoimage"); } -bool isImageWriteOnly(const Value &val) { - return argHasNVVMAnnotation(val, "wroimage"); +bool isImageWriteOnly(const Value &V) { + return argHasNVVMAnnotation(V, "wroimage"); } -bool isImageReadWrite(const Value &val) { - return argHasNVVMAnnotation(val, "rdwrimage"); +bool isImageReadWrite(const Value &V) { + return argHasNVVMAnnotation(V, "rdwrimage"); } -bool isImage(const Value &val) { - return isImageReadOnly(val) || isImageWriteOnly(val) || isImageReadWrite(val); +bool isImage(const Value &V) { + return isImageReadOnly(V) || isImageWriteOnly(V) || isImageReadWrite(V); } -bool isManaged(const Value &val) { - if(const GlobalValue *gv = dyn_cast(&val)) { - unsigned Annot; - if (findOneNVVMAnnotation(gv, "managed", Annot)) { - assert((Annot == 1) && "Unexpected annotation on a managed symbol"); - return true; - } - } - return false; -} +bool isManaged(const Value &V) { return globalHasNVVMAnnotation(V, "managed"); } -std::string getTextureName(const Value &val) { - assert(val.hasName() && "Found texture variable with no name"); - return std::string(val.getName()); +StringRef getTextureName(const Value &V) { + assert(V.hasName() && "Found texture variable with no name"); + return V.getName(); } -std::string getSurfaceName(const Value &val) { - assert(val.hasName() && "Found surface variable with no name"); - return std::string(val.getName()); +StringRef getSurfaceName(const Value &V) { + assert(V.hasName() && "Found surface variable with no name"); + return V.getName(); } -std::string getSamplerName(const Value &val) { - assert(val.hasName() && "Found sampler variable with no name"); - return std::string(val.getName()); +StringRef getSamplerName(const Value &V) { + assert(V.hasName() && "Found sampler variable with no name"); + return V.getName(); } std::optional getMaxNTIDx(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidx"); + return findOneNVVMAnnotation(&F, "maxntidx"); } std::optional getMaxNTIDy(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidy"); + return findOneNVVMAnnotation(&F, "maxntidy"); } std::optional getMaxNTIDz(const Function &F) { - return findOneNVVMAnnotation(F, "maxntidz"); + return findOneNVVMAnnotation(&F, "maxntidz"); } std::optional getMaxNTID(const Function &F) { @@ -302,20 +272,20 @@ std::optional getMaxNTID(const Function &F) { return std::nullopt; } -bool getMaxClusterRank(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "maxclusterrank", x); +std::optional getMaxClusterRank(const Function &F) { + return findOneNVVMAnnotation(&F, "maxclusterrank"); } std::optional getReqNTIDx(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidx"); + return findOneNVVMAnnotation(&F, "reqntidx"); } std::optional getReqNTIDy(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidy"); + return findOneNVVMAnnotation(&F, "reqntidy"); } std::optional getReqNTIDz(const Function &F) { - return findOneNVVMAnnotation(F, "reqntidz"); + return findOneNVVMAnnotation(&F, "reqntidz"); } std::optional getReqNTID(const Function &F) { @@ -328,21 +298,20 @@ std::optional getReqNTID(const Function &F) { return std::nullopt; } -bool getMinCTASm(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "minctasm", x); +std::optional getMinCTASm(const Function &F) { + return findOneNVVMAnnotation(&F, "minctasm"); } -bool getMaxNReg(const Function &F, unsigned &x) { - return findOneNVVMAnnotation(&F, "maxnreg", x); +std::optional getMaxNReg(const Function &F) { + return findOneNVVMAnnotation(&F, "maxnreg"); } bool isKernelFunction(const Function &F) { - unsigned x = 0; - if (!findOneNVVMAnnotation(&F, "kernel", x)) { - // There is no NVVM metadata, check the calling convention - return F.getCallingConv() == CallingConv::PTX_Kernel; - } - return (x == 1); + if (const auto X = findOneNVVMAnnotation(&F, "kernel")) + return (*X == 1); + + // There is no NVVM metadata, check the calling convention + return F.getCallingConv() == CallingConv::PTX_Kernel; } MaybeAlign getAlign(const Function &F, unsigned Index) { diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index 938b9b04b7a44..cf15dff85cbde 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -32,11 +32,6 @@ class TargetMachine; void clearAnnotationCache(const Module *); -bool findOneNVVMAnnotation(const GlobalValue *, const std::string &, - unsigned &); -bool findAllNVVMAnnotation(const GlobalValue *, const std::string &, - std::vector &); - bool isTexture(const Value &); bool isSurface(const Value &); bool isSampler(const Value &); @@ -46,23 +41,23 @@ bool isImageWriteOnly(const Value &); bool isImageReadWrite(const Value &); bool isManaged(const Value &); -std::string getTextureName(const Value &); -std::string getSurfaceName(const Value &); -std::string getSamplerName(const Value &); +StringRef getTextureName(const Value &); +StringRef getSurfaceName(const Value &); +StringRef getSamplerName(const Value &); std::optional getMaxNTIDx(const Function &); std::optional getMaxNTIDy(const Function &); std::optional getMaxNTIDz(const Function &); -std::optional getMaxNTID(const Function &F); +std::optional getMaxNTID(const Function &); std::optional getReqNTIDx(const Function &); std::optional getReqNTIDy(const Function &); std::optional getReqNTIDz(const Function &); std::optional getReqNTID(const Function &); -bool getMaxClusterRank(const Function &, unsigned &); -bool getMinCTASm(const Function &, unsigned &); -bool getMaxNReg(const Function &, unsigned &); +std::optional getMaxClusterRank(const Function &); +std::optional getMinCTASm(const Function &); +std::optional getMaxNReg(const Function &); bool isKernelFunction(const Function &); bool isParamGridConstant(const Value &); @@ -75,10 +70,9 @@ Function *getMaybeBitcastedCallee(const CallBase *CB); inline unsigned promoteScalarArgumentSize(unsigned size) { if (size <= 32) return 32; - else if (size <= 64) + if (size <= 64) return 64; - else - return size; + return size; } bool shouldEmitPTXNoReturn(const Value *V, const TargetMachine &TM); From 4ca4460bae12eefe90bf69704a33bdd5b1c9f142 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sun, 22 Sep 2024 23:29:45 -0700 Subject: [PATCH 079/212] [hwasan] Add "-hwasan-with-frame-record" (#109620) It should not be implied form mapping settings. No longer disable frame records for fixed offset. --- .../Transforms/Instrumentation/HWAddressSanitizer.cpp | 9 +++++++-- .../Instrumentation/HWAddressSanitizer/RISCV/alloca.ll | 4 ++-- llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll | 8 ++++---- llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll | 4 ++-- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 49d463a07553f..7d3db5c2c8d5c 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -187,6 +187,11 @@ static cl::opt ClMappingOffsetDynamic( clEnumValN(OffsetKind::kIfunc, "ifunc", "Use ifunc global"), clEnumValN(OffsetKind::kTls, "tls", "Use TLS"))); +static cl::opt + ClFrameRecords("hwasan-with-frame-record", + cl::desc("Use ring buffer for stack allocations"), + cl::Hidden); + static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", cl::desc("Hot percentile cuttoff.")); @@ -1934,12 +1939,12 @@ void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple, SetFixed(0); } else if (ClMappingOffset.getNumOccurrences() > 0) { SetFixed(ClMappingOffset); - WithFrameRecord = false; } else if (ClEnableKhwasan || InstrumentWithCalls) { SetFixed(0); WithFrameRecord = false; } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) { Kind = ClMappingOffsetDynamic; - WithFrameRecord = isInTls(); } + + WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord); } diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll index 24a89af97cffe..edbcdbeb8516c 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll @@ -2,8 +2,8 @@ ; Test alloca instrumentation. Command line includes check-globals so that ; changes to debug-info are detectable. ; -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "riscv64-unknown-linux" diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll index 4d0cce72470b9..451ab9ee184a3 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll @@ -2,11 +2,11 @@ ; Test alloca instrumentation. Command line includes check-globals so that ; changes to debug-info are detectable. ; -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=DYNAMIC-SHADOW -; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=DYNAMIC-SHADOW +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -hwasan-with-frame-record=0 -S --try-experimental-debuginfo-iterators | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android10000" diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll index 005a11b00c7a5..73fc077c95624 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll @@ -7,9 +7,9 @@ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-TLS-HISTORY ; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=tls -hwasan-record-stack-history=none < %s | \ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-TLS-NOHISTORY -; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=global < %s | \ +; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=global -hwasan-with-frame-record=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=NOIFUNC-NOTLS -; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=ifunc < %s | \ +; RUN: opt -passes=hwasan -S -hwasan-mapping-offset-dynamic=ifunc -hwasan-with-frame-record=0 < %s | \ ; RUN: FileCheck %s --check-prefixes=IFUNC-NOTLS ; RUN: opt -passes=hwasan -S -mtriple=aarch64-fuchsia < %s | \ ; RUN: FileCheck %s --check-prefixes=FUCHSIA From fa627d98e87504b6f6d621a7dab5d140340ed760 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 24 Sep 2024 20:00:11 -0700 Subject: [PATCH 080/212] [flang][cuda] Add entry point for alloc/free and simple copy (#109867) These will be used to translate simple cuf.alloc/cuf.free and cuf.data_transfer on scalar and constant size arrays. --- flang/include/flang/Runtime/CUDA/common.h | 7 +++ flang/include/flang/Runtime/CUDA/memory.h | 12 +++++ flang/runtime/CUDA/memory.cpp | 52 +++++++++++++++++++++ flang/unittests/Runtime/CUDA/CMakeLists.txt | 1 + flang/unittests/Runtime/CUDA/Memory.cpp | 31 ++++++++++++ 5 files changed, 103 insertions(+) create mode 100644 flang/unittests/Runtime/CUDA/Memory.cpp diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h index cb8681da161f0..b73bc390ea8c9 100644 --- a/flang/include/flang/Runtime/CUDA/common.h +++ b/flang/include/flang/Runtime/CUDA/common.h @@ -12,6 +12,13 @@ #include "flang/Runtime/descriptor.h" #include "flang/Runtime/entry-names.h" +/// Type of memory for allocation/deallocation +static constexpr unsigned kMemTypeDevice = 0; +static constexpr unsigned kMemTypeManaged = 1; +static constexpr unsigned kMemTypeUnified = 2; +static constexpr unsigned kMemTypePinned = 3; + +/// Data transfer kinds. static constexpr unsigned kHostToDevice = 0; static constexpr unsigned kDeviceToHost = 1; static constexpr unsigned kDeviceToDevice = 2; diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h index 33947248dc483..3c3ae73d4ad7a 100644 --- a/flang/include/flang/Runtime/CUDA/memory.h +++ b/flang/include/flang/Runtime/CUDA/memory.h @@ -17,12 +17,24 @@ namespace Fortran::runtime::cuda { extern "C" { +/// Allocate memory on the device. +void *RTDECL(CUFMemAlloc)(std::size_t bytes, unsigned type, + const char *sourceFile = nullptr, int sourceLine = 0); + +/// Free memory allocated on the device. +void RTDECL(CUFMemFree)(void *devicePtr, unsigned type, + const char *sourceFile = nullptr, int sourceLine = 0); + /// Set value to the data hold by a descriptor. The \p value pointer must be /// addressable to the same amount of bytes specified by the element size of /// the descriptor \p desc. void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void *value, const char *sourceFile = nullptr, int sourceLine = 0); +/// Data transfer from a pointer to a pointer. +void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, + unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); + /// Data transfer from a pointer to a descriptor. void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index a287fa14a4878..fc48b4343eea9 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -8,12 +8,47 @@ #include "flang/Runtime/CUDA/memory.h" #include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" #include "cuda_runtime.h" namespace Fortran::runtime::cuda { extern "C" { +void *RTDEF(CUFMemAlloc)( + std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) { + void *ptr = nullptr; + if (bytes != 0) { + if (type == kMemTypeDevice) { + CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes)); + } else if (type == kMemTypeManaged || type == kMemTypeUnified) { + CUDA_REPORT_IF_ERROR( + cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal)); + } else if (type == kMemTypePinned) { + CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes)); + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("unsupported memory type"); + } + } + return ptr; +} + +void RTDEF(CUFMemFree)( + void *ptr, unsigned type, const char *sourceFile, int sourceLine) { + if (!ptr) + return; + if (type == kMemTypeDevice || type == kMemTypeManaged || + type == kMemTypeUnified) { + CUDA_REPORT_IF_ERROR(cudaFree(ptr)); + } else if (type == kMemTypePinned) { + CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr)); + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("unsupported memory type"); + } +} + void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; @@ -21,6 +56,23 @@ void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value, "value to a descriptor"); } +void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, + unsigned mode, const char *sourceFile, int sourceLine) { + cudaMemcpyKind kind; + if (mode == kHostToDevice) { + kind = cudaMemcpyHostToDevice; + } else if (mode == kDeviceToHost) { + kind = cudaMemcpyDeviceToHost; + } else if (mode == kDeviceToDevice) { + kind = cudaMemcpyDeviceToDevice; + } else { + Terminator terminator{sourceFile, sourceLine}; + terminator.Crash("host to host copy not supported"); + } + // TODO: Use cudaMemcpyAsync when we have support for stream. + CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); +} + void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt index 30fb8c220233c..a7fe604d687bd 100644 --- a/flang/unittests/Runtime/CUDA/CMakeLists.txt +++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt @@ -3,6 +3,7 @@ if (FLANG_CUF_RUNTIME) add_flang_unittest(FlangCufRuntimeTests Allocatable.cpp AllocatorCUF.cpp + Memory.cpp ) if (BUILD_SHARED_LIBS) diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp new file mode 100644 index 0000000000000..157d3cdb531de --- /dev/null +++ b/flang/unittests/Runtime/CUDA/Memory.cpp @@ -0,0 +1,31 @@ +//===-- flang/unittests/Runtime/Memory.cpp -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/memory.h" +#include "gtest/gtest.h" +#include "../../../runtime/terminator.h" +#include "flang/Common/Fortran.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +using namespace Fortran::runtime::cuda; + +TEST(MemoryCUFTest, SimpleAllocTramsferFree) { + int *dev = (int *)RTNAME(CUFMemAlloc)( + sizeof(int), kMemTypeDevice, __FILE__, __LINE__); + EXPECT_TRUE(dev != 0); + int host = 42; + RTNAME(CUFDataTransferPtrPtr) + ((void *)dev, (void *)&host, sizeof(int), kHostToDevice, __FILE__, __LINE__); + host = 0; + RTNAME(CUFDataTransferPtrPtr) + ((void *)&host, (void *)dev, sizeof(int), kDeviceToHost, __FILE__, __LINE__); + EXPECT_EQ(42, host); + RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__); +} From 18b9d49ce3370c012fdd04ec87d854d53293f6a6 Mon Sep 17 00:00:00 2001 From: Andrew Rogers Date: Tue, 24 Sep 2024 20:00:34 -0700 Subject: [PATCH 081/212] lldb: get lldb API tests working with newer Android NDKs ## Purpose Running the LLDB API tests against a remote Android target with NDK version r22 or later fails to compile the test inferiors. NDK r21 from 2021 is the most recent NDK that still works with the LLDB API tests. This PR updates the Android make rules to support newer Android NDK versions (r19 and later). ## Overview * Updates and simplifies `Android.rules` to match the newer Android NDK unified toolchain layout introduced in NDK r19 * Sets `OBJCOPY` and `ARCHIVER` env vars, required by a few test cases, to their `llvm-` versions in the unified toolchain * Drops support for pre-2019 Android NDK versions to keep the rules simple * Provides an error message if the tests are run using an incompatible NDK layout ## Problem Details Android introduced a unified tools layout in NDK r19 (2019) and removed the old layout in r22 (2021). Releases r19, r20, and r21 support both the old and new layout side-by-side. More details are in #106270. ## Validation Ran a sub-set of the LLDB API tests against remote Android targets for the four primary architectures i386, x86_64, arm, and aarch64. No validation was done against riscv targets. For each case, ran the copy of `lldb-server` from the Android NDK on the device with the latest LLDB test cases in llvm-project Ran tests with both r19 (the oldest supported) and r26 (more recent, unified layout only) NDK versions. Example test command for aarch64: ``` ./build/bin/lldb-dotest --out-of-tree-debugserver --arch aarch64 --platform-name remote-android --platform-url connect://localhost:5432 --platform-working-dir /data/local/tmp --compiler=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin/clang lldb/test/API/android/ ``` **NOTE: there are a lot of test failures when running the full suite (especially against 32-bit ARM target). These failures occur independent of this change.** Verified the expected error message appears when attempting to run using NDK r18 ``` Build Command Output: make: Entering directory '/home/andrew/src/llvm/llvm-project/build/lldb-test-build.noindex/android/platform/TestDefaultCacheLineSize.test_cache_line_size' /home/andrew/src/llvm/llvm-project/lldb/packages/Python/lldbsuite/test/make/Android.rules:16: *** "No unified toolchain sysroot found in /home/andrew/Android/Sdk/ndk/18.1.5063045/toolchains/llvm/prebuilt/linux-x86_64/bin/../../../../... NDK must be r19 or later.". Stop. make: Leaving directory '/home/andrew/src/llvm/llvm-project/build/lldb-test-build.noindex/android/platform/TestDefaultCacheLineSize.test_cache_line_size' ``` ## Impact **This change explicitly removes support for the pre-2019 NDK structure.** Only NDK r19 (from 2019) and later can be used when running the LLDB API tests. If the maintainers object, we can easily support both the old and new NDK toolchain layouts side-by-side at the cost of readability/maintainability. Since this change only impacts tests, I don't see much value in supporting NDKs that are over 5 years old. ## Guidance to Reviewers * I am not an expert on `clang` arguments so if anything looks off let me know. * While I personally thing supporting 5+ year old NDKs for testing seems unnecessary, please chime-in if you are concerned with dropping that support. I can easily revise to support both old and new layouts side-by-side. * If there are any specific tests you'd like me to run I will do my best to accommodate. It doesn't look like there's much (any?) Android LLDB CI coverage. --- .../Python/lldbsuite/test/make/Android.rules | 86 +++++++------------ 1 file changed, 32 insertions(+), 54 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/make/Android.rules b/lldb/packages/Python/lldbsuite/test/make/Android.rules index cd7d8ae74d6bf..44aedf7248419 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Android.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Android.rules @@ -1,81 +1,59 @@ NDK_ROOT := $(shell dirname $(CC))/../../../../.. -ifeq "$(findstring 64, $(ARCH))" "64" - # lowest 64-bit API level - API_LEVEL := 21 -else ifeq "$(ARCH)" "i386" - # clone(2) declaration is present only since this api level - API_LEVEL := 17 +ifeq "$(HOST_OS)" "Linux" + HOST_TAG := linux-x86_64 +else ifeq "$(HOST_OS)" "Darwin" + HOST_TAG := darwin-x86_64 else - # lowest supported 32-bit API level - API_LEVEL := 16 + HOST_TAG := windows-x86_64 +endif + +TOOLCHAIN_ROOT := $(NDK_ROOT)/toolchains/llvm/prebuilt/$(HOST_TAG) +TOOLCHAIN_SYSROOT := $(TOOLCHAIN_ROOT)/sysroot + +OBJCOPY ?= $(TOOLCHAIN_ROOT)/bin/llvm-objcopy +ARCHIVER ?= $(TOOLCHAIN_ROOT)/bin/llvm-ar + +ifeq "$(wildcard $(TOOLCHAIN_SYSROOT)/.)" "" +# Compiling test inferiors for Android requires an NDK with the unified +# toolchain introduced in version r19. +$(error "No unified toolchain sysroot found in $(NDK_ROOT). NDK must be r19 or later.") endif ifeq "$(ARCH)" "arm" - SYSROOT_ARCH := arm - STL_ARCH := armeabi-v7a TRIPLE := armv7-none-linux-androideabi ARCH_CFLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16 -marm else ifeq "$(ARCH)" "aarch64" - SYSROOT_ARCH := arm64 - STL_ARCH := arm64-v8a TRIPLE := aarch64-none-linux-android else ifeq "$(ARCH)" "i386" - SYSROOT_ARCH := x86 - STL_ARCH := x86 TRIPLE := i686-none-linux-android else - SYSROOT_ARCH := $(ARCH) - STL_ARCH := $(ARCH) TRIPLE := $(ARCH)-none-linux-android endif -ifeq "$(findstring 86,$(ARCH))" "86" - TOOLCHAIN_DIR := $(STL_ARCH)-4.9 -else ifeq "$(ARCH)" "arm" - TOOLCHAIN_DIR := arm-linux-androideabi-4.9 -else - TOOLCHAIN_DIR := $(subst -none,,$(TRIPLE))-4.9 -endif +# lowest 64-bit API level +API_LEVEL := 21 ifeq "$(ARCH)" "arm" - TOOL_PREFIX := arm-linux-androideabi + ARCH_DIR := arm-linux-androideabi else - TOOL_PREFIX := $(subst -none,,$(TRIPLE)) + ARCH_DIR := $(subst -none,,$(TRIPLE)) endif -ifeq "$(HOST_OS)" "Linux" - HOST_TAG := linux-x86_64 -else ifeq "$(HOST_OS)" "Darwin" - HOST_TAG := darwin-x86_64 -else - HOST_TAG := windows-x86_64 -endif - -GCC_TOOLCHAIN = $(NDK_ROOT)/toolchains/$(TOOLCHAIN_DIR)/prebuilt/$(HOST_TAG) - -OBJCOPY ?= $(GCC_TOOLCHAIN)/bin/$(TOOL_PREFIX)-objcopy -ARCHIVER ?= $(GCC_TOOLCHAIN)/bin/$(TOOL_PREFIX)-ar - -ifeq "$(findstring clang,$(CC))" "clang" - ARCH_CFLAGS += -target $(TRIPLE) --gcc-toolchain=$(GCC_TOOLCHAIN) - ARCH_LDFLAGS += -target $(TRIPLE) --gcc-toolchain=$(GCC_TOOLCHAIN) -endif - -ARCH_CFLAGS += --sysroot=$(NDK_ROOT)/sysroot \ - -isystem $(NDK_ROOT)/sysroot/usr/include/$(TOOL_PREFIX) \ - -D__ANDROID_API__=$(API_LEVEL) \ - -isystem $(NDK_ROOT)/platforms/android-$(API_LEVEL)/arch-$(SYSROOT_ARCH)/usr/include - -ARCH_LDFLAGS += --sysroot=$(NDK_ROOT)/platforms/android-$(API_LEVEL)/arch-$(SYSROOT_ARCH) -lm +ARCH_CFLAGS += \ + --target=$(TRIPLE) \ + --sysroot=$(TOOLCHAIN_SYSROOT) \ + -D__ANDROID_API__=$(API_LEVEL) ARCH_CXXFLAGS += \ - -isystem $(NDK_ROOT)/sources/cxx-stl/llvm-libc++/include \ - -isystem $(NDK_ROOT)/sources/android/support/include \ - -isystem $(NDK_ROOT)/sources/cxx-stl/llvm-libc++abi/include + -isystem $(TOOLCHAIN_SYSROOT)/usr/include/c++/v1 ARCH_LDFLAGS += \ - -L$(NDK_ROOT)/sources/cxx-stl/llvm-libc++/libs/$(STL_ARCH) \ - $(NDK_ROOT)/sources/cxx-stl/llvm-libc++/libs/$(STL_ARCH)/libc++_static.a \ + --target=$(TRIPLE) \ + --sysroot=$(TOOLCHAIN_SYSROOT) \ + --prefix=$(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/$(API_LEVEL) \ + -L$(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/$(API_LEVEL) \ + $(TOOLCHAIN_SYSROOT)/usr/lib/$(ARCH_DIR)/libc++_static.a \ + -lm \ -lc++abi \ -nostdlib++ From d50eaac12f0cdfe27e942290942b06889ab12a8c Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 24 Sep 2024 20:31:54 -0700 Subject: [PATCH 082/212] Revert "[clang][CodeGen] Zero init unspecified fields in initializers in C" (#109898) Reverts llvm/llvm-project#97121 Causing failures on LNT bots; log shows a crash in ConstStructBuilder::BuildStruct. --- clang/docs/LanguageExtensions.rst | 23 -- clang/lib/CodeGen/CGExprAgg.cpp | 40 +-- clang/lib/CodeGen/CGExprConstant.cpp | 93 +----- clang/lib/CodeGen/CodeGenModule.h | 51 ---- ...07-22-bitfield-init-after-zero-len-array.c | 2 +- clang/test/CodeGen/2008-08-07-AlignPadding1.c | 4 +- .../CodeGen/2009-06-14-anonymous-union-init.c | 4 +- clang/test/CodeGen/64bit-swiftcall.c | 12 +- clang/test/CodeGen/arm-swiftcall.c | 4 +- clang/test/CodeGen/const-init.c | 4 +- clang/test/CodeGen/decl.c | 4 +- clang/test/CodeGen/designated-initializers.c | 12 +- clang/test/CodeGen/ext-int.c | 18 +- clang/test/CodeGen/flexible-array-init.c | 24 +- clang/test/CodeGen/global-init.c | 2 +- clang/test/CodeGen/init.c | 19 ++ .../linux-kernel-struct-union-initializer.c | 267 ------------------ .../linux-kernel-struct-union-initializer2.c | 140 --------- clang/test/CodeGen/mingw-long-double.c | 9 +- clang/test/CodeGen/mms-bitfields.c | 4 +- clang/test/CodeGen/union-init2.c | 4 +- clang/test/CodeGen/windows-swiftcall.c | 12 +- .../CodeGenObjC/designated-initializers.m | 2 +- 23 files changed, 96 insertions(+), 658 deletions(-) delete mode 100644 clang/test/CodeGen/linux-kernel-struct-union-initializer.c delete mode 100644 clang/test/CodeGen/linux-kernel-struct-union-initializer2.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index f4be97047422f..0c6b9b1b8f9ce 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -5860,26 +5860,3 @@ specify the starting offset to begin embedding from. The resources is treated as being empty if the specified offset is larger than the number of bytes in the resource. The offset will be applied *before* any ``limit`` parameters are applied. - -Union and aggregate initialization in C -======================================= - -In C23 (N2900), when an object is initialized from initializer ``= {}``, all -elements of arrays, all members of structs, and the first members of unions are -empty-initialized recursively. In addition, all padding bits are initialized to -zero. - -Clang guarantees the following behaviors: - -* ``1:`` Clang supports initializer ``= {}`` mentioned above in all C - standards. - -* ``2:`` When unions are initialized from initializer ``= {}``, bytes outside - of the first members of unions are also initialized to zero. - -* ``3:`` When unions, structures and arrays are initialized from initializer - ``= { initializer-list }``, all members not explicitly initialized in - the initializer list are empty-initialized recursively. In addition, all - padding bits are initialized to zero. - -Currently, the above extension only applies to C source code, not C++. diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 43f3bcc95fe76..bbfc6672ecc25 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -1698,17 +1698,6 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( // Prepare a 'this' for CXXDefaultInitExprs. CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress()); - const bool ZeroInitPadding = - CGF.CGM.shouldZeroInitPadding() && !Dest.isZeroed(); - const Address BaseLoc = Dest.getAddress().withElementType(CGF.Int8Ty); - auto DoZeroInitPadding = [&](CharUnits Offset, CharUnits Size) { - if (Size.isPositive()) { - Address Loc = CGF.Builder.CreateConstGEP(BaseLoc, Offset.getQuantity()); - llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity()); - CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false); - } - }; - if (record->isUnion()) { // Only initialize one field of a union. The field itself is // specified by the initializer list. @@ -1733,37 +1722,17 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( if (NumInitElements) { // Store the initializer into the field EmitInitializationToLValue(InitExprs[0], FieldLoc); - if (ZeroInitPadding) { - CharUnits TotalSize = - Dest.getPreferredSize(CGF.getContext(), DestLV.getType()); - CharUnits FieldSize = - CGF.getContext().getTypeSizeInChars(FieldLoc.getType()); - DoZeroInitPadding(FieldSize, TotalSize - FieldSize); - } } else { // Default-initialize to null. - if (ZeroInitPadding) - EmitNullInitializationToLValue(DestLV); - else - EmitNullInitializationToLValue(FieldLoc); + EmitNullInitializationToLValue(FieldLoc); } + return; } // Here we iterate over the fields; this makes it simpler to both // default-initialize fields and skip over unnamed fields. - const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(record); - CharUnits SizeSoFar = CharUnits::Zero(); for (const auto *field : record->fields()) { - if (ZeroInitPadding) { - unsigned FieldNo = field->getFieldIndex(); - CharUnits Offset = - CGF.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo)); - DoZeroInitPadding(SizeSoFar, Offset - SizeSoFar); - CharUnits FieldSize = - CGF.getContext().getTypeSizeInChars(field->getType()); - SizeSoFar = Offset + FieldSize; - } // We're done once we hit the flexible array member. if (field->getType()->isIncompleteArrayType()) break; @@ -1805,11 +1774,6 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( } } } - if (ZeroInitPadding) { - CharUnits TotalSize = - Dest.getPreferredSize(CGF.getContext(), DestLV.getType()); - DoZeroInitPadding(SizeSoFar, TotalSize - SizeSoFar); - } } void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 66bc0640b632a..dd65080a84044 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -42,16 +42,6 @@ using namespace CodeGen; namespace { class ConstExprEmitter; -llvm::Constant *getPadding(const CodeGenModule &CGM, CharUnits PadSize) { - llvm::Type *Ty = CGM.CharTy; - if (PadSize > CharUnits::One()) - Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); - if (CGM.shouldZeroInitPadding()) { - return llvm::Constant::getNullValue(Ty); - } - return llvm::UndefValue::get(Ty); -} - struct ConstantAggregateBuilderUtils { CodeGenModule &CGM; @@ -71,7 +61,10 @@ struct ConstantAggregateBuilderUtils { } llvm::Constant *getPadding(CharUnits PadSize) const { - return ::getPadding(CGM, PadSize); + llvm::Type *Ty = CGM.CharTy; + if (PadSize > CharUnits::One()) + Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); + return llvm::UndefValue::get(Ty); } llvm::Constant *getZeroes(CharUnits ZeroSize) const { @@ -598,11 +591,6 @@ class ConstStructBuilder { bool Build(const InitListExpr *ILE, bool AllowOverwrite); bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits BaseOffset); - bool DoZeroInitPadding(const ASTRecordLayout &Layout, unsigned FieldNo, - const FieldDecl &Field, bool AllowOverwrite, - CharUnits &FieldSize, CharUnits &SizeSoFar); - bool DoZeroInitPadding(const ASTRecordLayout &Layout, bool AllowOverwrite, - CharUnits &SizeSoFar); llvm::Constant *Finalize(QualType Ty); }; @@ -727,10 +715,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { if (CXXRD->getNumBases()) return false; - const bool ZeroInitPadding = CGM.shouldZeroInitPadding(); - CharUnits FieldSize = CharUnits::Zero(); - CharUnits SizeSoFar = CharUnits::Zero(); - for (FieldDecl *Field : RD->fields()) { ++FieldNo; @@ -748,13 +732,8 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { const Expr *Init = nullptr; if (ElementNo < ILE->getNumInits()) Init = ILE->getInit(ElementNo++); - if (isa_and_nonnull(Init)) { - if (ZeroInitPadding && - !DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize, - SizeSoFar)) - return false; + if (isa_and_nonnull(Init)) continue; - } // Zero-sized fields are not emitted, but their initializers may still // prevent emission of this struct as a constant. @@ -764,11 +743,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { continue; } - if (ZeroInitPadding && - !DoZeroInitPadding(Layout, FieldNo, *Field, AllowOverwrite, FieldSize, - SizeSoFar)) - return false; - // When emitting a DesignatedInitUpdateExpr, a nested InitListExpr // represents additional overwriting of our current constant value, and not // a new constant to emit independently. @@ -794,10 +768,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { if (!EltInit) return false; - if (ZeroInitPadding && FieldSize.isZero()) - SizeSoFar += CharUnits::fromQuantity( - CGM.getDataLayout().getTypeAllocSize(EltInit->getType())); - if (!Field->isBitField()) { // Handle non-bitfield members. if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit, @@ -815,9 +785,6 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) { } } - if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar)) - return false; - return true; } @@ -882,9 +849,6 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, unsigned FieldNo = 0; uint64_t OffsetBits = CGM.getContext().toBits(Offset); - const bool ZeroInitPadding = CGM.shouldZeroInitPadding(); - CharUnits FieldSize = CharUnits::Zero(); - CharUnits SizeSoFar = CharUnits::Zero(); bool AllowOverwrite = false; for (RecordDecl::field_iterator Field = RD->field_begin(), @@ -906,15 +870,6 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, if (!EltInit) return false; - if (ZeroInitPadding) { - if (!DoZeroInitPadding(Layout, FieldNo, **Field, AllowOverwrite, - FieldSize, SizeSoFar)) - return false; - if (FieldSize.isZero()) - SizeSoFar += CharUnits::fromQuantity( - CGM.getDataLayout().getTypeAllocSize(EltInit->getType())); - } - if (!Field->isBitField()) { // Handle non-bitfield members. if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, @@ -931,35 +886,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, return false; } } - if (ZeroInitPadding && !DoZeroInitPadding(Layout, AllowOverwrite, SizeSoFar)) - return false; - - return true; -} -bool ConstStructBuilder::DoZeroInitPadding( - const ASTRecordLayout &Layout, unsigned FieldNo, const FieldDecl &Field, - bool AllowOverwrite, CharUnits &FieldSize, CharUnits &SizeSoFar) { - CharUnits Offset = - CGM.getContext().toCharUnitsFromBits(Layout.getFieldOffset(FieldNo)); - if (SizeSoFar < Offset) - if (!AppendBytes(SizeSoFar, getPadding(CGM, Offset - SizeSoFar), - AllowOverwrite)) - return false; - FieldSize = CGM.getContext().getTypeSizeInChars(Field.getType()); - SizeSoFar = Offset + FieldSize; - return true; -} - -bool ConstStructBuilder::DoZeroInitPadding(const ASTRecordLayout &Layout, - bool AllowOverwrite, - CharUnits &SizeSoFar) { - CharUnits TotalSize = Layout.getSize(); - if (SizeSoFar < TotalSize) - if (!AppendBytes(SizeSoFar, getPadding(CGM, TotalSize - SizeSoFar), - AllowOverwrite)) - return false; - SizeSoFar = TotalSize; return true; } @@ -1200,10 +1127,12 @@ class ConstExprEmitter assert(CurSize <= TotalSize && "Union size mismatch!"); if (unsigned NumPadBytes = TotalSize - CurSize) { - llvm::Constant *Padding = - getPadding(CGM, CharUnits::fromQuantity(NumPadBytes)); - Elts.push_back(Padding); - Types.push_back(Padding->getType()); + llvm::Type *Ty = CGM.CharTy; + if (NumPadBytes > 1) + Ty = llvm::ArrayType::get(Ty, NumPadBytes); + + Elts.push_back(llvm::UndefValue::get(Ty)); + Types.push_back(Ty); } llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index fcdfef03088da..c58bb88035ca8 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1676,57 +1676,6 @@ class CodeGenModule : public CodeGenTypeCache { MustTailCallUndefinedGlobals.insert(Global); } - bool shouldZeroInitPadding() const { - // In C23 (N3096) $6.7.10: - // """ - // If any object is initialized with an empty iniitializer, then it is - // subject to default initialization: - // - if it is an aggregate, every member is initialized (recursively) - // according to these rules, and any padding is initialized to zero bits; - // - if it is a union, the first named member is initialized (recursively) - // according to these rules, and any padding is initialized to zero bits. - // - // If the aggregate or union contains elements or members that are - // aggregates or unions, these rules apply recursively to the subaggregates - // or contained unions. - // - // If there are fewer initializers in a brace-enclosed list than there are - // elements or members of an aggregate, or fewer characters in a string - // literal used to initialize an array of known size than there are elements - // in the array, the remainder of the aggregate is subject to default - // initialization. - // """ - // - // From my understanding, the standard is ambiguous in the following two - // areas: - // 1. For a union type with empty initializer, if the first named member is - // not the largest member, then the bytes comes after the first named member - // but before padding are left unspecified. An example is: - // union U { int a; long long b;}; - // union U u = {}; // The first 4 bytes are 0, but 4-8 bytes are left - // unspecified. - // - // 2. It only mentions padding for empty initializer, but doesn't mention - // padding for a non empty initialization list. And if the aggregation or - // union contains elements or members that are aggregates or unions, and - // some are non empty initializers, while others are empty initiailizers, - // the padding initialization is unclear. An example is: - // struct S1 { int a; long long b; }; - // struct S2 { char c; struct S1 s1; }; - // // The values for paddings between s2.c and s2.s1.a, between s2.s1.a - // and s2.s1.b are unclear. - // struct S2 s2 = { 'c' }; - // - // Here we choose to zero initiailize left bytes of a union type. Because - // projects like the Linux kernel are relying on this behavior. If we don't - // explicitly zero initialize them, the undef values can be optimized to - // return gabage data. We also choose to zero initialize paddings for - // aggregates and unions, no matter they are initialized by empty - // initializers or non empty initializers. This can provide a consistent - // behavior. So projects like the Linux kernel can rely on it. - return !getLangOpts().CPlusPlus; - } - private: bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const; diff --git a/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c b/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c index b639734ef5d4b..b72d689659e60 100644 --- a/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c +++ b/clang/test/CodeGen/2008-07-22-bitfield-init-after-zero-len-array.c @@ -8,4 +8,4 @@ struct et7 { 52, }; -// CHECK: @yv7 ={{.*}} global { [0 x float], i8, [3 x i8] } { [0 x float] zeroinitializer, i8 52, [3 x i8] zeroinitializer } +// CHECK: @yv7 ={{.*}} global %struct.et7 { [0 x float] zeroinitializer, i8 52 } diff --git a/clang/test/CodeGen/2008-08-07-AlignPadding1.c b/clang/test/CodeGen/2008-08-07-AlignPadding1.c index d69cbc22cc1df..17e88ce02659f 100644 --- a/clang/test/CodeGen/2008-08-07-AlignPadding1.c +++ b/clang/test/CodeGen/2008-08-07-AlignPadding1.c @@ -20,9 +20,9 @@ struct gc_generation { #define GEN_HEAD(n) (&generations[n].head) -// The idea is that there are 6 zeroinitializers in this structure initializer to cover +// The idea is that there are 6 undefs in this structure initializer to cover // the padding between elements. -// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] zeroinitializer }, i32 700, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] zeroinitializer }, i32 10, i32 0, [8 x i8] zeroinitializer }] +// CHECK: @generations ={{.*}} global [3 x %struct.gc_generation] [%struct.gc_generation { %union._gc_head { %struct.anon { ptr @generations, ptr @generations, i64 0 }, [8 x i8] undef }, i32 700, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 48), ptr getelementptr (i8, ptr @generations, i64 48), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }, %struct.gc_generation { %union._gc_head { %struct.anon { ptr getelementptr (i8, ptr @generations, i64 96), ptr getelementptr (i8, ptr @generations, i64 96), i64 0 }, [8 x i8] undef }, i32 10, i32 0, [8 x i8] undef }] /* linked lists of container objects */ struct gc_generation generations[3] = { /* PyGC_Head, threshold, count */ diff --git a/clang/test/CodeGen/2009-06-14-anonymous-union-init.c b/clang/test/CodeGen/2009-06-14-anonymous-union-init.c index a4375d7868f01..13f6357f7966d 100644 --- a/clang/test/CodeGen/2009-06-14-anonymous-union-init.c +++ b/clang/test/CodeGen/2009-06-14-anonymous-union-init.c @@ -7,7 +7,7 @@ struct sysfs_dirent { }; struct sysfs_dirent sysfs_root = { {}, 16877 }; -// CHECK: @sysfs_root = {{.*}}global { %union.anon, i16, [2 x i8] } { %union.anon zeroinitializer, i16 16877, [2 x i8] zeroinitializer } +// CHECK: @sysfs_root = {{.*}}global %struct.sysfs_dirent { %union.anon zeroinitializer, i16 16877 } struct Foo { union { struct empty {} x; }; @@ -16,4 +16,4 @@ struct Foo { struct Foo foo = { {}, 16877 }; // EMPTY: @foo = {{.*}}global %struct.Foo { i16 16877 } -// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] zeroinitializer, i16 16877 } +// EMPTY-MSVC: @foo = {{.*}}global %struct.Foo { [4 x i8] undef, i16 16877 } diff --git a/clang/test/CodeGen/64bit-swiftcall.c b/clang/test/CodeGen/64bit-swiftcall.c index 7f8aa02d97ce1..7af65ccf556a8 100644 --- a/clang/test/CodeGen/64bit-swiftcall.c +++ b/clang/test/CodeGen/64bit-swiftcall.c @@ -14,6 +14,8 @@ // CHECK-DAG: %struct.atomic_padded = type { { %struct.packed, [7 x i8] } } // CHECK-DAG: %struct.packed = type <{ i64, i8 }> +// +// CHECK: [[STRUCT2_RESULT:@.*]] = private {{.*}} constant [[STRUCT2_TYPE:%.*]] { i32 0, i8 0, i8 undef, i8 0, i32 0, i32 0 } /*****************************************************************************/ /****************************** PARAMETER ABIS *******************************/ @@ -160,8 +162,8 @@ typedef struct { } struct_2; TEST(struct_2); // CHECK-LABEL: define{{.*}} swiftcc { i64, i64 } @return_struct_2() {{.*}}{ -// CHECK: [[RET:%.*]] = alloca [[STRUCT2:%.*]], align 4 -// CHECK: call void @llvm.memset +// CHECK: [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4 +// CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[RET]], {{.*}}[[STRUCT2_RESULT]] // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 1 @@ -171,7 +173,7 @@ TEST(struct_2); // CHECK: ret { i64, i64 } [[R1]] // CHECK: } // CHECK-LABEL: define{{.*}} swiftcc void @take_struct_2(i64 %0, i64 %1) {{.*}}{ -// CHECK: [[V:%.*]] = alloca [[STRUCT2]], align 4 +// CHECK: [[V:%.*]] = alloca [[STRUCT:%.*]], align 4 // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 0 // CHECK: store i64 %0, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 1 @@ -179,7 +181,7 @@ TEST(struct_2); // CHECK: ret void // CHECK: } // CHECK-LABEL: define{{.*}} void @test_struct_2() {{.*}} { -// CHECK: [[TMP:%.*]] = alloca [[STRUCT2]], align 4 +// CHECK: [[TMP:%.*]] = alloca [[STRUCT2_TYPE]], align 4 // CHECK: [[CALL:%.*]] = call swiftcc { i64, i64 } @return_struct_2() // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw {{.*}} [[TMP]], i32 0, i32 0 // CHECK: [[T0:%.*]] = extractvalue { i64, i64 } [[CALL]], 0 @@ -252,7 +254,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define{{.*}} swiftcc i64 @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 -// CHECK: call void @llvm.memset{{.*}}(ptr align 8 [[RET]] +// CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 [[RET]] // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw { i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, ptr [[GEP]], align 8 // CHECK: ret i64 [[R0]] diff --git a/clang/test/CodeGen/arm-swiftcall.c b/clang/test/CodeGen/arm-swiftcall.c index 677b878c6765d..ec0e3867909a8 100644 --- a/clang/test/CodeGen/arm-swiftcall.c +++ b/clang/test/CodeGen/arm-swiftcall.c @@ -172,7 +172,7 @@ typedef struct { TEST(struct_2); // CHECK-LABEL: define{{.*}} @return_struct_2() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align 4 -// CHECK: @llvm.memset +// CHECK: @llvm.memcpy // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG:{ i32, i32, float, float }]], ptr [[RET]], i32 0, i32 0 // CHECK: [[FIRST:%.*]] = load i32, ptr [[T0]], align 4 // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG]], ptr [[RET]], i32 0, i32 1 @@ -274,7 +274,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define{{.*}} @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[REC:%.*]], align {{(4|8)}} -// CHECK: @llvm.memset +// CHECK: @llvm.memcpy // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG:{ i32, i32 }]], ptr [[RET]], i32 0, i32 0 // CHECK: [[FIRST:%.*]] = load i32, ptr [[T0]], align {{(4|8)}} // CHECK: [[T0:%.*]] = getelementptr inbounds nuw [[AGG]], ptr [[RET]], i32 0, i32 1 diff --git a/clang/test/CodeGen/const-init.c b/clang/test/CodeGen/const-init.c index fc973cb983a80..ad3e9551199ac 100644 --- a/clang/test/CodeGen/const-init.c +++ b/clang/test/CodeGen/const-init.c @@ -170,7 +170,7 @@ void g30(void) { int : 1; int x; } a = {}; - // CHECK: @g30.a = internal global %struct.anon.1 zeroinitializer, align 1 + // CHECK: @g30.a = internal global %struct.anon.1 <{ i8 undef, i32 0 }>, align 1 #pragma pack() } @@ -182,7 +182,7 @@ void g31(void) { short z; } a = {23122, -12312731, -312}; #pragma pack() - // CHECK: @g31.a = internal global { i16, [2 x i8], i32, i16, [2 x i8] } { i16 23122, [2 x i8] zeroinitializer, i32 -12312731, i16 -312, [2 x i8] zeroinitializer }, align 4 + // CHECK: @g31.a = internal global %struct.anon.2 { i16 23122, i32 -12312731, i16 -312 }, align 4 } // Clang should evaluate this in constant context, so floating point mode should diff --git a/clang/test/CodeGen/decl.c b/clang/test/CodeGen/decl.c index 97446781fdbd2..a63846b3223da 100644 --- a/clang/test/CodeGen/decl.c +++ b/clang/test/CodeGen/decl.c @@ -2,10 +2,10 @@ // CHECK: @test1.x = internal constant [12 x i32] [i32 1 // CHECK: @__const.test2.x = private unnamed_addr constant [13 x i32] [i32 1, -// CHECK: @test5w = {{(dso_local )?}}global { i32, [4 x i8] } { i32 2, [4 x i8] zeroinitializer } +// CHECK: @test5w = {{(dso_local )?}}global { i32, [4 x i8] } { i32 2, [4 x i8] undef } // CHECK: @test5y = {{(dso_local )?}}global { double } { double 7.300000e+0{{[0]*}}1 } -// CHECK: @__const.test6.x = private unnamed_addr constant { i8, i8, [2 x i8], i32, i32 } { i8 1, i8 2, [2 x i8] zeroinitializer, i32 3, i32 0 } +// CHECK: @__const.test6.x = private unnamed_addr constant %struct.SelectDest { i8 1, i8 2, i32 3, i32 0 } // CHECK: @test7 = {{(dso_local )?}}global [2 x %struct.test7s] [%struct.test7s { i32 1, i32 2 }, %struct.test7s { i32 4, i32 0 }] diff --git a/clang/test/CodeGen/designated-initializers.c b/clang/test/CodeGen/designated-initializers.c index ac7860db43be7..620b1b90d2575 100644 --- a/clang/test/CodeGen/designated-initializers.c +++ b/clang/test/CodeGen/designated-initializers.c @@ -8,7 +8,7 @@ struct foo { // CHECK: @u ={{.*}} global %union.anon zeroinitializer union { int i; float f; } u = { }; -// CHECK: @u2 ={{.*}} global { i32, [4 x i8] } zeroinitializer +// CHECK: @u2 ={{.*}} global { i32, [4 x i8] } { i32 0, [4 x i8] undef } union { int i; double f; } u2 = { }; // CHECK: @u3 ={{.*}} global %union.anon.1 zeroinitializer @@ -62,22 +62,22 @@ struct overwrite_string_struct2 { char L[6]; int M; } overwrite_string2[] = { { { "foo" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [6 x i8] c"fox\00\00\00", [2 x i8] zeroinitializer, i32 1 +// CHECK: [6 x i8] c"fox\00\00\00", i32 1 struct overwrite_string_struct3 { char L[3]; int M; } overwrite_string3[] = { { { "foo" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [3 x i8] c"fox", i8 0, i32 1 +// CHECK: [3 x i8] c"fox", i32 1 struct overwrite_string_struct4 { char L[3]; int M; } overwrite_string4[] = { { { "foobar" }, 1 }, [0].L[2] = 'x'}; -// CHECK: [3 x i8] c"fox", i8 0, i32 1 +// CHECK: [3 x i8] c"fox", i32 1 struct overwrite_string_struct5 { char L[6]; int M; } overwrite_string5[] = { { { "foo" }, 1 }, [0].L[4] = 'y'}; -// CHECK: [6 x i8] c"foo\00y\00", [2 x i8] zeroinitializer, i32 1 +// CHECK: [6 x i8] c"foo\00y\00", i32 1 // CHECK: @u1 = {{.*}} { i32 65535 } @@ -138,7 +138,7 @@ union_16644_t union_16644_instance_4[2] = [1].b[1] = 4 }; -// CHECK: @lab ={{.*}} global { [4 x i8], i32 } { [4 x i8] zeroinitializer, i32 123 } +// CHECK: @lab ={{.*}} global { [4 x i8], i32 } { [4 x i8] undef, i32 123 } struct leading_anon_bitfield { int : 32; int n; } lab = { .n = 123 }; struct Base { diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c index aebacd6f22ffc..e3d609a4ba4a2 100644 --- a/clang/test/CodeGen/ext-int.c +++ b/clang/test/CodeGen/ext-int.c @@ -16,7 +16,7 @@ unsigned _BitInt(1) GlobSize1 = 0; // CHECK: @GlobSize1 = {{.*}}global i8 0 -// CHECK64: @__const.foo.A = private unnamed_addr constant { i32, [4 x i8], <{ i8, [23 x i8] }> } { i32 1, [4 x i8] zeroinitializer, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8 +// CHECK64: @__const.foo.A = private unnamed_addr constant { i32, [4 x i8], <{ i8, [23 x i8] }> } { i32 1, [4 x i8] undef, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8 // @BigGlob = global [40 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF", align 8 // CHECK64: @f.p = internal global <{ i8, i8, [22 x i8] }> <{ i8 16, i8 39, [22 x i8] zeroinitializer }>, align 8 @@ -91,8 +91,8 @@ int foo(int a) { // CHECK64: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 2 // WIN32: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 2 // LIN32: %B2 = getelementptr inbounds nuw %struct.S1, ptr %B, i32 0, i32 1 - // CHECK: %[[V1:.+]] = load i32, ptr %a.addr, align 4 - // CHECK: %conv = sext i32 %[[V1]] to i129 + // CHECK: %0 = load i32, ptr %a.addr, align 4 + // CHECK: %conv = sext i32 %0 to i129 // CHECK64: storedv = sext i129 %conv to i192 // WIN32: storedv = sext i129 %conv to i192 // LIN32: storedv = sext i129 %conv to i160 @@ -102,12 +102,12 @@ int foo(int a) { // CHECK64: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 2 // WIN32: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 2 // LIN32: %B3 = getelementptr inbounds nuw %struct.S1, ptr %A, i32 0, i32 1 - // CHECK64: %[[V2:.+]] = load i192, ptr %B3, align 8 - // WIN32: %[[V2:.+]] = load i192, ptr %B3, align 8 - // LIN32: %[[V2:.+]] = load i160, ptr %B3, align 4 - // CHECK64: %loadedv = trunc i192 %[[V2]] to i129 - // WIN32: %loadedv = trunc i192 %[[V2]] to i129 - // LIN32: %loadedv = trunc i160 %[[V2]] to i129 + // CHECK64: %1 = load i192, ptr %B3, align 8 + // WIN32: %1 = load i192, ptr %B3, align 8 + // LIN32: %1 = load i160, ptr %B3, align 4 + // CHECK64: %loadedv = trunc i192 %1 to i129 + // WIN32: %loadedv = trunc i192 %1 to i129 + // LIN32: %loadedv = trunc i160 %1 to i129 // CHECK: %conv4 = trunc i129 %loadedv to i32 struct S1 A = {1, 170}; struct S1 B = {1, a}; diff --git a/clang/test/CodeGen/flexible-array-init.c b/clang/test/CodeGen/flexible-array-init.c index 17b520fe83094..15a30c15ac966 100644 --- a/clang/test/CodeGen/flexible-array-init.c +++ b/clang/test/CodeGen/flexible-array-init.c @@ -14,11 +14,11 @@ struct { int y[]; } b1 = { { 14, 16 } }; // sizeof(c) == 8, so this global should be at least 8 bytes. struct { int x; char c; char y[]; } c = { 1, 2, { 13, 15 } }; -// CHECK: @c ={{.*}} global { i32, i8, [2 x i8], i8 } { i32 1, i8 2, [2 x i8] c"\0D\0F", i8 0 } +// CHECK: @c ={{.*}} global { i32, i8, [2 x i8] } { i32 1, i8 2, [2 x i8] c"\0D\0F" } // sizeof(d) == 8, so this global should be at least 8 bytes. struct __attribute((packed, aligned(4))) { char a; int x; char z[]; } d = { 1, 2, { 13, 15 } }; -// CHECK: @d ={{.*}} <{ i8, i32, [2 x i8], i8 }> <{ i8 1, i32 2, [2 x i8] c"\0D\0F", i8 0 }>, +// CHECK: @d ={{.*}} <{ i8, i32, [2 x i8], i8 }> <{ i8 1, i32 2, [2 x i8] c"\0D\0F", i8 undef }>, // This global needs 9 bytes to hold all the flexible array members. struct __attribute((packed, aligned(4))) { char a; int x; char z[]; } e = { 1, 2, { 13, 15, 17, 19 } }; @@ -55,21 +55,21 @@ struct { int a; union { int b; short x[]; }; int c; int d; } hf = {1, 2, {}, 3}; // First member is the potential flexible array, initialization requires braces. struct { int a; union { short x; int b; }; int c; int d; } i = {1, 2, {}, 3}; -// CHECK: @i = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] zeroinitializer }, i32 0, i32 3 } +// CHECK: @i = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] undef }, i32 0, i32 3 } struct { int a; union { short x[0]; int b; }; int c; int d; } i0 = {1, {}, 2, 3}; -// CHECK: @i0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } zeroinitializer, i32 2, i32 3 } +// CHECK: @i0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 2, i32 3 } struct { int a; union { short x[1]; int b; }; int c; int d; } i1 = {1, {2}, {}, 3}; -// CHECK: @i1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] zeroinitializer }, i32 0, i32 3 } +// CHECK: @i1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 0, i32 3 } struct { int a; union { short x[]; int b; }; int c; int d; } i_f = {4, {}, {}, 6}; -// CHECK: @i_f = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 4, { [0 x i16], [4 x i8] } zeroinitializer, i32 0, i32 6 } +// CHECK: @i_f = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 4, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 0, i32 6 } // Named initializers; order doesn't matter. struct { int a; union { int b; short x; }; int c; int d; } hn = {.a = 1, .x = 2, .c = 3}; -// CHECK: @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] zeroinitializer }, i32 3, i32 0 } +// CHECK: @hn = global { i32, { i16, [2 x i8] }, i32, i32 } { i32 1, { i16, [2 x i8] } { i16 2, [2 x i8] undef }, i32 3, i32 0 } struct { int a; union { int b; short x[0]; }; int c; int d; } hn0 = {.a = 1, .x = {2}, .c = 3}; -// CHECK: @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } zeroinitializer, i32 3, i32 0 } +// CHECK: @hn0 = global { i32, { [0 x i16], [4 x i8] }, i32, i32 } { i32 1, { [0 x i16], [4 x i8] } { [0 x i16] zeroinitializer, [4 x i8] undef }, i32 3, i32 0 } struct { int a; union { int b; short x[1]; }; int c; int d; } hn1 = {.a = 1, .x = {2}, .c = 3}; -// CHECK: @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] zeroinitializer }, i32 3, i32 0 } +// CHECK: @hn1 = global { i32, { [1 x i16], [2 x i8] }, i32, i32 } { i32 1, { [1 x i16], [2 x i8] } { [1 x i16] [i16 2], [2 x i8] undef }, i32 3, i32 0 } struct { char a[]; } empty_struct = {}; // CHECK: @empty_struct ={{.*}} global %struct.anon{{.*}} zeroinitializer, align 1 @@ -96,10 +96,10 @@ union { char a[]; } only_in_union0 = {0}; // CHECK: @only_in_union0 = global { [1 x i8] } zeroinitializer, align 1 union { char a[]; int b; } first_in_union = {}; -// CHECK: @first_in_union = global { [0 x i8], [4 x i8] } zeroinitializer, align 4 +// CHECK: @first_in_union = global { [0 x i8], [4 x i8] } { [0 x i8] zeroinitializer, [4 x i8] undef }, align 4 union { char a[]; int b; } first_in_union0 = {0}; -// CHECK: @first_in_union0 = global { [1 x i8], [3 x i8] } zeroinitializer, align 4 +// CHECK: @first_in_union0 = global { [1 x i8], [3 x i8] } { [1 x i8] zeroinitializer, [3 x i8] undef }, align 4 union { char a[]; int b; } first_in_union123 = { {1, 2, 3} }; -// CHECK: @first_in_union123 = global { [3 x i8], i8 } { [3 x i8] c"\01\02\03", i8 0 }, align 4 +// CHECK: @first_in_union123 = global { [3 x i8], i8 } { [3 x i8] c"\01\02\03", i8 undef }, align 4 diff --git a/clang/test/CodeGen/global-init.c b/clang/test/CodeGen/global-init.c index b156466dbaaff..7f1d675b97c09 100644 --- a/clang/test/CodeGen/global-init.c +++ b/clang/test/CodeGen/global-init.c @@ -33,7 +33,7 @@ struct ManyFields { int f; }; -// CHECK: global { i32, i32, i32, i8, [3 x i8], i32, i32 } { i32 1, i32 2, i32 0, i8 0, [3 x i8] zeroinitializer, i32 0, i32 0 } +// CHECK: global %struct.ManyFields { i32 1, i32 2, i32 0, i8 0, i32 0, i32 0 } struct ManyFields FewInits = {1, 2}; diff --git a/clang/test/CodeGen/init.c b/clang/test/CodeGen/init.c index 27f427dff8f79..cbf615bb9ddfe 100644 --- a/clang/test/CodeGen/init.c +++ b/clang/test/CodeGen/init.c @@ -187,6 +187,25 @@ void nonzeroMemsetf64(void) { // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 68, i32 56, i1 false) } +void nonzeroPaddedUnionMemset(void) { + union U { char c; int i; }; + union U arr[9] = { 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, }; + // CHECK-LABEL: @nonzeroPaddedUnionMemset( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 -16, i32 36, i1 false) +} + +void nonzeroNestedMemset(void) { + union U { char c; int i; }; + struct S { union U u; short i; }; + struct S arr[5] = { { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, { {0xF0}, 0xF0F0 }, }; + // CHECK-LABEL: @nonzeroNestedMemset( + // CHECK-NOT: store + // CHECK-NOT: memcpy + // CHECK: call void @llvm.memset.p0.i32(ptr {{.*}}, i8 -16, i32 40, i1 false) +} + // PR9257 struct test11S { int A[10]; diff --git a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c b/clang/test/CodeGen/linux-kernel-struct-union-initializer.c deleted file mode 100644 index dc68cc0f454c8..0000000000000 --- a/clang/test/CodeGen/linux-kernel-struct-union-initializer.c +++ /dev/null @@ -1,267 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=gnu11 -verify -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -union U1 { - int x; - char y[16]; -}; - -struct S1 { - int x; - union U1 y; -}; - -union U2 { - int x; - char y[16]; -} __attribute__((__aligned__(32))); - -struct S2 { - int x; - long long y; - char z[8]; -} __attribute__((__aligned__(32))); - -union U1 global_u1 = {}; - -union U1 global_u2 = {3}; - -union U1 global_u2_from_cast = (union U1)3; - -struct S1 global_s1 = {}; - -struct S1 global_s2 = { - .x = 3, -}; - -struct S1 global_s3 = {.x = 3, .y = {.x = 6}}; - -const union U1 global_const_u1 = {4}; -struct S1 global_s3_from_const_u1 = {.y = global_const_u1}; - -union U2 global_u3 = {}; - -struct S2 global_s4 = {}; - -struct S2 global_s5 = {.x = 1}; - - -// Test empty initializer for union. -//. -// CHECK: @global_u1 = global %union.U1 zeroinitializer, align 4 -// CHECK: @global_u2 = global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_u2_from_cast = global { i32, [12 x i8] } { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_s1 = global %struct.S1 zeroinitializer, align 4 -// CHECK: @global_s2 = global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 -// CHECK: @global_s3 = global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @global_const_u1 = constant %union.U1 { i32 4, [12 x i8] zeroinitializer }, align 4 -// CHECK: @global_s3_from_const_u1 = global %struct.S1 { i32 0, %union.U1 { i32 4, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @global_u3 = global %union.U2 zeroinitializer, align 32 -// CHECK: @global_s4 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } zeroinitializer, align 32 -// CHECK: @global_s5 = global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 -// CHECK: @test2.a = internal global %union.U1 zeroinitializer, align 4 -// CHECK: @__const.test3.a = private unnamed_addr constant %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @test4.a = internal global %union.U1 { i32 3, [12 x i8] zeroinitializer }, align 4 -// CHECK: @test6.s = internal global %struct.S1 zeroinitializer, align 4 -// CHECK: @__const.test7.s = private unnamed_addr constant %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 -// CHECK: @test8.s = internal global %struct.S1 { i32 3, %union.U1 zeroinitializer }, align 4 -// CHECK: @__const.test9.s = private unnamed_addr constant %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @test10.s = internal global %struct.S1 { i32 3, %union.U1 { i32 6, [12 x i8] zeroinitializer } }, align 4 -// CHECK: @test12.a = internal global %union.U2 zeroinitializer, align 32 -// CHECK: @test14.s = internal global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } zeroinitializer, align 32 -// CHECK: @__const.test15.s = private unnamed_addr constant { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 -// CHECK: @test16.s = internal global { i32, [4 x i8], i64, [8 x i8], [8 x i8] } { i32 1, [4 x i8] zeroinitializer, i64 0, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer }, align 32 -//. -// CHECK-LABEL: define dso_local void @test1( -// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 16, i1 false) -// CHECK-NEXT: ret void -// -void test1() { - union U1 a = {}; -} - -// Test empty initializer for union. Use static variable. -// CHECK-LABEL: define dso_local void @test2( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test2() { - static union U1 a = {}; -} - -// Test only initializing a small field for union. -// CHECK-LABEL: define dso_local void @test3( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 @__const.test3.a, i64 16, i1 false) -// CHECK-NEXT: ret void -// -void test3() { - union U1 a = {3}; -} - -// Test only initializing a small field for union. Use static variable. -// CHECK-LABEL: define dso_local void @test4( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test4() { - static union U1 a = {3}; -} - -// Test union in struct. Use empty initializer for the struct. -// CHECK-LABEL: define dso_local void @test5( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[S]], i8 0, i64 20, i1 false) -// CHECK-NEXT: ret void -// -void test5() { - struct S1 s = {}; -} - -// Test union in struct. Use empty initializer for the struct. Use static variable. -// CHECK-LABEL: define dso_local void @test6( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test6() { - static struct S1 s = {}; -} - -// Test union in struct. Initialize other fields of the struct. -// CHECK-LABEL: define dso_local void @test7( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S]], ptr align 4 @__const.test7.s, i64 20, i1 false) -// CHECK-NEXT: ret void -// -void test7() { - struct S1 s = { - .x = 3, - }; -} - -// Test union in struct. Initialize other fields of the struct. Use static variable. -// CHECK-LABEL: define dso_local void @test8( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test8() { - static struct S1 s = { - .x = 3, - }; -} - -// Test union in struct. Initialize a small field for union. -// CHECK-LABEL: define dso_local void @test9( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S]], ptr align 4 @__const.test9.s, i64 20, i1 false) -// CHECK-NEXT: ret void -// -void test9() { - struct S1 s = {.x = 3, - .y = { - .x = 6, - }}; -} - -// Test union in struct. Initialize a small field for union. Use static variable. -// CHECK-LABEL: define dso_local void @test10( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test10() { - static struct S1 s = {.x = 3, - .y = { - .x = 6, - }}; -} - -// Test empty initializer for union with padding. -// CHECK-LABEL: define dso_local void @test11( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U2:%.*]], align 32 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[A]], i8 0, i64 32, i1 false) -// CHECK-NEXT: ret void -// -void test11() { - union U2 a = {}; -} - -// Test empty initializer for union with padding. Use static variable. -// CHECK-LABEL: define dso_local void @test12( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test12() { - static union U2 a = {}; -} - -// Test empty initializer for struct with padding. -// CHECK-LABEL: define dso_local void @test13( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 32 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[S]], i8 0, i64 32, i1 false) -// CHECK-NEXT: ret void -// -void test13() { - struct S2 s = {}; -} - -// Test empty initializer for struct with padding. Use static variable. -// CHECK-LABEL: define dso_local void @test14( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test14() { - static struct S2 s = {}; -} - -// Test partial initialization for struct with padding. -// CHECK-LABEL: define dso_local void @test15( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 32 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 32 [[S]], ptr align 32 @__const.test15.s, i64 32, i1 false) -// CHECK-NEXT: ret void -// -void test15() { - struct S2 s = {.x = 1}; -} - -// Test partial initialization for struct with padding. Use static variable. -// CHECK-LABEL: define dso_local void @test16( -// CHECK-SAME: ) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: ret void -// -void test16() { - static struct S2 s = {.x = 1}; -} -//. -// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } -// CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -//. -// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -//. diff --git a/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c b/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c deleted file mode 100644 index 0a1ad3a369eac..0000000000000 --- a/clang/test/CodeGen/linux-kernel-struct-union-initializer2.c +++ /dev/null @@ -1,140 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=gnu11 -verify -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics - -union U1 { - int x; - char y[5]; -}; - -struct S1 { - int x; - long long y; -}; - -struct S2 { - unsigned char b1 : 3; // 1st 3 bits (in 1st byte) are b1 - unsigned char : 2; // next 2 bits (in 1st byte) are blocked out as unused - unsigned char b2 : 6; // 6 bits for b2 - doesn't fit into the 1st byte => starts a 2nd - unsigned char b3 : 2; // 2 bits for b3 - next (and final) bits in the 2nd byte - int i; -}; - -struct S3 { - int x; -} __attribute__((__aligned__(8))); - -struct S4 { - int a; - union U1 b; -}; - -// Test non-const initializer for union with padding. -// CHECK-LABEL: define dso_local void @test1( -// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[A:%.*]] = alloca [[UNION_U1:%.*]], align 4 -// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[A]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 4, i1 false) -// CHECK-NEXT: ret void -// -void test1(int x) { - union U1 a = {x}; -} - -// Test non-const initializer for struct with padding. -// CHECK-LABEL: define dso_local void @test2( -// CHECK-SAME: i64 noundef [[Y:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 -// CHECK-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[S]], i32 0, i32 0 -// CHECK-NEXT: store i32 0, ptr [[X]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[S]], i64 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 4, i1 false) -// CHECK-NEXT: [[Y1:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[S]], i32 0, i32 1 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y_ADDR]], align 8 -// CHECK-NEXT: store i64 [[TMP1]], ptr [[Y1]], align 8 -// CHECK-NEXT: ret void -// -void test2(long long y) { - struct S1 s = {.y = y}; -} - -// Test non-const initializer for struct with padding and bit fields. -// CHECK-LABEL: define dso_local void @test3( -// CHECK-SAME: i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 4 -// CHECK-NEXT: store i8 [[B]], ptr [[B_ADDR]], align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[B_ADDR]], align 1 -// CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i16 -// CHECK-NEXT: [[BF_LOAD:%.*]] = load i16, ptr [[S]], align 4 -// CHECK-NEXT: [[BF_VALUE:%.*]] = and i16 [[TMP1]], 7 -// CHECK-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -8 -// CHECK-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_VALUE]] -// CHECK-NEXT: store i16 [[BF_SET]], ptr [[S]], align 4 -// CHECK-NEXT: [[BF_LOAD1:%.*]] = load i16, ptr [[S]], align 4 -// CHECK-NEXT: [[BF_CLEAR2:%.*]] = and i16 [[BF_LOAD1]], -16129 -// CHECK-NEXT: [[BF_SET3:%.*]] = or i16 [[BF_CLEAR2]], 0 -// CHECK-NEXT: store i16 [[BF_SET3]], ptr [[S]], align 4 -// CHECK-NEXT: [[BF_LOAD4:%.*]] = load i16, ptr [[S]], align 4 -// CHECK-NEXT: [[BF_CLEAR5:%.*]] = and i16 [[BF_LOAD4]], 16383 -// CHECK-NEXT: [[BF_SET6:%.*]] = or i16 [[BF_CLEAR5]], 0 -// CHECK-NEXT: store i16 [[BF_SET6]], ptr [[S]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[S]], i64 2 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP2]], i8 0, i64 2, i1 false) -// CHECK-NEXT: [[I:%.*]] = getelementptr inbounds nuw [[STRUCT_S2]], ptr [[S]], i32 0, i32 1 -// CHECK-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-NEXT: ret void -// -void test3(unsigned char b) { - struct S2 s = {.b1 = b}; -} - -// Test non-const initializer for struct with padding at the end of the struct. -// CHECK-LABEL: define dso_local void @test4( -// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S3:%.*]], align 8 -// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_S3]], ptr [[S]], i32 0, i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[X1]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[S]], i64 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 4, i1 false) -// CHECK-NEXT: ret void -// -void test4(int x) { - struct S3 s = {x}; -} - -// Test non-const initializer for union in struct. -// CHECK-LABEL: define dso_local void @test5( -// CHECK-SAME: i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S4:%.*]], align 4 -// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds nuw [[STRUCT_S4]], ptr [[S]], i32 0, i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[A1]], align 4 -// CHECK-NEXT: [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_S4]], ptr [[S]], i32 0, i32 1 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[B2]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[B2]], i64 4 -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP2]], i8 0, i64 4, i1 false) -// CHECK-NEXT: ret void -// -void test5(int a, int b) { - struct S4 s = {a, {b}}; -} diff --git a/clang/test/CodeGen/mingw-long-double.c b/clang/test/CodeGen/mingw-long-double.c index 0fc8f01509682..4be97526f9631 100644 --- a/clang/test/CodeGen/mingw-long-double.c +++ b/clang/test/CodeGen/mingw-long-double.c @@ -11,9 +11,12 @@ struct { char c; long double ldb; } agggregate_LD = {}; -// GNU32: @agggregate_LD = dso_local global { i8, [3 x i8], x86_fp80 } zeroinitializer, align 4 -// GNU64: @agggregate_LD = dso_local global { i8, [15 x i8], x86_fp80 } zeroinitializer, align 16 -// MSC64: @agggregate_LD = dso_local global { i8, [7 x i8], double } zeroinitializer, align 8 +// GNU32: %struct.anon = type { i8, x86_fp80 } +// GNU32: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 4 +// GNU64: %struct.anon = type { i8, x86_fp80 } +// GNU64: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 16 +// MSC64: %struct.anon = type { i8, double } +// MSC64: @agggregate_LD = dso_local global %struct.anon zeroinitializer, align 8 long double dataLD = 1.0L; // GNU32: @dataLD = dso_local global x86_fp80 0xK3FFF8000000000000000, align 4 diff --git a/clang/test/CodeGen/mms-bitfields.c b/clang/test/CodeGen/mms-bitfields.c index 2ccce326c7131..49c5c1c3e7d40 100644 --- a/clang/test/CodeGen/mms-bitfields.c +++ b/clang/test/CodeGen/mms-bitfields.c @@ -61,5 +61,5 @@ union HEADER { struct Inner variable = { 1,0,1, 21 }; union HEADER hdr = {{1,2,3,4}}; -// CHECK: @variable ={{.*}} global { i8, [3 x i8], i8, i8, i8, i8 } { i8 5, [3 x i8] zeroinitializer, i8 21, i8 0, i8 0, i8 0 }, align 1 -// CHECK: @hdr ={{.*}} global { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } } { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } { i8 8, i8 0, [2 x i8] zeroinitializer, i8 2, i8 0, i8 0, i8 3, i8 4, [3 x i8] zeroinitializer } }, align 1 +// CHECK: @variable ={{.*}} global { i8, [3 x i8], i8, i8, i8, i8 } { i8 5, [3 x i8] undef, i8 21, i8 0, i8 0, i8 0 }, align 1 +// CHECK: @hdr ={{.*}} global { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } } { { i8, i8, [2 x i8], i8, i8, i8, i8, i8, [3 x i8] } { i8 8, i8 0, [2 x i8] undef, i8 2, i8 0, i8 0, i8 3, i8 4, [3 x i8] undef } }, align 1 diff --git a/clang/test/CodeGen/union-init2.c b/clang/test/CodeGen/union-init2.c index ee35e78a4f301..048ff00517b4e 100644 --- a/clang/test/CodeGen/union-init2.c +++ b/clang/test/CodeGen/union-init2.c @@ -2,11 +2,11 @@ // RUN: %clang_cc1 -x c++ %s -emit-llvm -triple x86_64-linux-gnu -o - | FileCheck %s --check-prefixes=CHECK-CXX // Make sure we generate something sane instead of a ptrtoint -// CHECK: @r, [4 x i8] zeroinitializer +// CHECK: @r, [4 x i8] undef union x {long long b;union x* a;} r = {.a = &r}; -// CHECK: global { [3 x i8], [5 x i8] } zeroinitializer +// CHECK: global { [3 x i8], [5 x i8] } { [3 x i8] zeroinitializer, [5 x i8] undef } union z { char a[3]; long long b; diff --git a/clang/test/CodeGen/windows-swiftcall.c b/clang/test/CodeGen/windows-swiftcall.c index 41569c2606622..bc7832d9d3ac2 100644 --- a/clang/test/CodeGen/windows-swiftcall.c +++ b/clang/test/CodeGen/windows-swiftcall.c @@ -5,6 +5,8 @@ #define ERROR __attribute__((swift_error_result)) #define CONTEXT __attribute__((swift_context)) +// CHECK: [[STRUCT2_RESULT:@.*]] = private {{.*}} constant [[STRUCT2_TYPE:%.*]] { i32 0, i8 0, i8 undef, i8 0, i32 0, i32 0 } + /*****************************************************************************/ /****************************** PARAMETER ABIS *******************************/ /*****************************************************************************/ @@ -140,8 +142,8 @@ typedef struct { } struct_2; TEST(struct_2); // CHECK-LABEL: define dso_local swiftcc { i64, i64 } @return_struct_2() {{.*}}{ -// CHECK: [[RET:%.*]] = alloca [[STRUCT2:%.*]], align 4 -// CHECK: call void @llvm.memset +// CHECK: [[RET:%.*]] = alloca [[STRUCT2_TYPE]], align 4 +// CHECK: call void @llvm.memcpy{{.*}}({{.*}}[[RET]], {{.*}}[[STRUCT2_RESULT]] // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[T0:%.*]] = load i64, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[RET]], i32 0, i32 1 @@ -151,7 +153,7 @@ TEST(struct_2); // CHECK: ret { i64, i64 } [[R1]] // CHECK: } // CHECK-LABEL: define dso_local swiftcc void @take_struct_2(i64 %0, i64 %1) {{.*}}{ -// CHECK: [[V:%.*]] = alloca [[STRUCT2]], align 4 +// CHECK: [[V:%.*]] = alloca [[STRUCT:%.*]], align 4 // CHECK: [[GEP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 0 // CHECK: store i64 %0, ptr [[GEP0]], align 4 // CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[V]], i32 0, i32 1 @@ -159,7 +161,7 @@ TEST(struct_2); // CHECK: ret void // CHECK: } // CHECK-LABEL: define dso_local void @test_struct_2() {{.*}} { -// CHECK: [[TMP:%.*]] = alloca [[STRUCT2]], align 4 +// CHECK: [[TMP:%.*]] = alloca [[STRUCT2_TYPE]], align 4 // CHECK: [[CALL:%.*]] = call swiftcc { i64, i64 } @return_struct_2() // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw {{.*}} [[TMP]], i32 0, i32 0 // CHECK: [[T0:%.*]] = extractvalue { i64, i64 } [[CALL]], 0 @@ -232,7 +234,7 @@ typedef union { TEST(union_het_fp) // CHECK-LABEL: define dso_local swiftcc i64 @return_union_het_fp() // CHECK: [[RET:%.*]] = alloca [[UNION:%.*]], align 8 -// CHECK: call void @llvm.memset{{.*}}(ptr align {{[0-9]+}} [[RET]] +// CHECK: call void @llvm.memcpy{{.*}}(ptr align {{[0-9]+}} [[RET]] // CHECK: [[GEP:%.*]] = getelementptr inbounds nuw { i64 }, ptr [[RET]], i32 0, i32 0 // CHECK: [[R0:%.*]] = load i64, ptr [[GEP]], align 8 // CHECK: ret i64 [[R0]] diff --git a/clang/test/CodeGenObjC/designated-initializers.m b/clang/test/CodeGenObjC/designated-initializers.m index ce58f6c367338..a67f82e1afbea 100644 --- a/clang/test/CodeGenObjC/designated-initializers.m +++ b/clang/test/CodeGenObjC/designated-initializers.m @@ -4,4 +4,4 @@ char L[3]; int M; } overwrite_string[] = { { { @encode(void**) }, 1 }, [0].L[1] = 'x'}; -// CHECK: [3 x i8] c"^xv", i8 0, i32 1 +// CHECK: [3 x i8] c"^xv", i32 1 From 81dac7d613c945cf56925162081eaf4a998cea8a Mon Sep 17 00:00:00 2001 From: harishch4 Date: Wed, 25 Sep 2024 09:29:44 +0530 Subject: [PATCH 083/212] [Flang][OpenMP] Add Semantic Checks for Atomic Capture Construct (#108516) This PR adds semantic checks to ensure the atomic capture construct conforms to one of the valid forms: [capture-stmt, update-stmt], [capture-stmt, write-stmt] or [update-stmt, capture-stmt]. Functions checkForSymbolMatch and checkForSingleVariableOnRHS are moved from flang/lib/Lower/DirectivesCommon.h to flang/Semantics/tools.h for reuse. --------- Co-authored-by: Kiran Chandramohan --- flang/include/flang/Semantics/tools.h | 29 +++++++++ flang/lib/Lower/DirectivesCommon.h | 34 +--------- flang/lib/Semantics/check-omp-structure.cpp | 64 +++++++++++++++++-- flang/lib/Semantics/check-omp-structure.h | 1 + .../OpenMP/omp-atomic-assignment-stmt.f90 | 64 +++++++++++++++++++ .../Semantics/OpenMP/requires-atomic01.f90 | 6 +- .../Semantics/OpenMP/requires-atomic02.f90 | 6 +- 7 files changed, 161 insertions(+), 43 deletions(-) diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 15c02ecc0058c..96d4dbb2acaa1 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -736,5 +736,34 @@ std::string GetCommonBlockObjectName(const Symbol &, bool underscoring); // Check for ambiguous USE associations bool HadUseError(SemanticsContext &, SourceName at, const Symbol *); +/// Checks if the assignment statement has a single variable on the RHS. +inline bool checkForSingleVariableOnRHS( + const Fortran::parser::AssignmentStmt &assignmentStmt) { + const Fortran::parser::Expr &expr{ + std::get(assignmentStmt.t)}; + const Fortran::common::Indirection *designator = + std::get_if>( + &expr.u); + return designator != nullptr; +} + +/// Checks if the symbol on the LHS of the assignment statement is present in +/// the RHS expression. +inline bool checkForSymbolMatch( + const Fortran::parser::AssignmentStmt &assignmentStmt) { + const auto &var{std::get(assignmentStmt.t)}; + const auto &expr{std::get(assignmentStmt.t)}; + const auto *e{Fortran::semantics::GetExpr(expr)}; + const auto *v{Fortran::semantics::GetExpr(var)}; + auto varSyms{Fortran::evaluate::GetSymbolVector(*v)}; + const Fortran::semantics::Symbol &varSymbol{*varSyms.front()}; + for (const Fortran::semantics::Symbol &symbol : + Fortran::evaluate::GetSymbolVector(*e)) { + if (varSymbol == symbol) { + return true; + } + } + return false; +} } // namespace Fortran::semantics #endif // FORTRAN_SEMANTICS_TOOLS_H_ diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index d2060e77ce530..a32f0b287e049 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -74,34 +74,6 @@ struct AddrAndBoundsInfo { } }; -/// Checks if the assignment statement has a single variable on the RHS. -static inline bool checkForSingleVariableOnRHS( - const Fortran::parser::AssignmentStmt &assignmentStmt) { - const Fortran::parser::Expr &expr{ - std::get(assignmentStmt.t)}; - const Fortran::common::Indirection *designator = - std::get_if>( - &expr.u); - return designator != nullptr; -} - -/// Checks if the symbol on the LHS of the assignment statement is present in -/// the RHS expression. -static inline bool -checkForSymbolMatch(const Fortran::parser::AssignmentStmt &assignmentStmt) { - const auto &var{std::get(assignmentStmt.t)}; - const auto &expr{std::get(assignmentStmt.t)}; - const auto *e{Fortran::semantics::GetExpr(expr)}; - const auto *v{Fortran::semantics::GetExpr(var)}; - auto varSyms{Fortran::evaluate::GetSymbolVector(*v)}; - const Fortran::semantics::Symbol &varSymbol{*varSyms.front()}; - for (const Fortran::semantics::Symbol &symbol : - Fortran::evaluate::GetSymbolVector(*e)) - if (varSymbol == symbol) - return true; - return false; -} - /// Populates \p hint and \p memoryOrder with appropriate clause information /// if present on atomic construct. static inline void genOmpAtomicHintAndMemoryOrderClauses( @@ -537,7 +509,7 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, stmt2LHSArg = fir::getBase(converter.genExprAddr(assign2.lhs, stmtCtx)); // Operation specific RHS evaluations - if (checkForSingleVariableOnRHS(stmt1)) { + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { // Atomic capture construct is of the form [capture-stmt, update-stmt] or // of the form [capture-stmt, write-stmt] stmt1RHSArg = fir::getBase(converter.genExprAddr(assign1.rhs, stmtCtx)); @@ -573,8 +545,8 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter, firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0))); mlir::Block &block = atomicCaptureOp->getRegion(0).back(); firOpBuilder.setInsertionPointToStart(&block); - if (checkForSingleVariableOnRHS(stmt1)) { - if (checkForSymbolMatch(stmt2)) { + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { + if (Fortran::semantics::checkForSymbolMatch(stmt2)) { // Atomic capture construct is of the form [capture-stmt, update-stmt] const Fortran::semantics::SomeExpr &fromExpr = *Fortran::semantics::GetExpr(stmt1Expr); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 643b713b32e29..dfc3f3290a81b 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1977,6 +1977,58 @@ void OmpStructureChecker::CheckAtomicUpdateStmt( ErrIfAllocatableVariable(var); } +// TODO: Allow cond-update-stmt once compare clause is supported. +void OmpStructureChecker::CheckAtomicCaptureConstruct( + const parser::OmpAtomicCapture &atomicCaptureConstruct) { + const Fortran::parser::AssignmentStmt &stmt1 = + std::get( + atomicCaptureConstruct.t) + .v.statement; + const auto &stmt1Var{std::get(stmt1.t)}; + const auto &stmt1Expr{std::get(stmt1.t)}; + + const Fortran::parser::AssignmentStmt &stmt2 = + std::get( + atomicCaptureConstruct.t) + .v.statement; + const auto &stmt2Var{std::get(stmt2.t)}; + const auto &stmt2Expr{std::get(stmt2.t)}; + + if (Fortran::semantics::checkForSingleVariableOnRHS(stmt1)) { + CheckAtomicCaptureStmt(stmt1); + if (Fortran::semantics::checkForSymbolMatch(stmt2)) { + // ATOMIC CAPTURE construct is of the form [capture-stmt, update-stmt] + CheckAtomicUpdateStmt(stmt2); + } else { + // ATOMIC CAPTURE construct is of the form [capture-stmt, write-stmt] + CheckAtomicWriteStmt(stmt2); + } + auto *v{stmt2Var.typedExpr.get()}; + auto *e{stmt1Expr.typedExpr.get()}; + if (v && e && !(v->v == e->v)) { + context_.Say(stmt1Expr.source, + "Captured variable/array element/derived-type component %s expected to be assigned in the second statement of ATOMIC CAPTURE construct"_err_en_US, + stmt1Expr.source); + } + } else if (Fortran::semantics::checkForSymbolMatch(stmt1) && + Fortran::semantics::checkForSingleVariableOnRHS(stmt2)) { + // ATOMIC CAPTURE construct is of the form [update-stmt, capture-stmt] + CheckAtomicUpdateStmt(stmt1); + CheckAtomicCaptureStmt(stmt2); + // Variable updated in stmt1 should be captured in stmt2 + auto *v{stmt1Var.typedExpr.get()}; + auto *e{stmt2Expr.typedExpr.get()}; + if (v && e && !(v->v == e->v)) { + context_.Say(stmt1Var.GetSource(), + "Updated variable/array element/derived-type component %s expected to be captured in the second statement of ATOMIC CAPTURE construct"_err_en_US, + stmt1Var.GetSource()); + } + } else { + context_.Say(stmt1Expr.source, + "Invalid ATOMIC CAPTURE construct statements. Expected one of [update-stmt, capture-stmt], [capture-stmt, update-stmt], or [capture-stmt, write-stmt]"_err_en_US); + } +} + void OmpStructureChecker::CheckAtomicMemoryOrderClause( const parser::OmpAtomicClauseList *leftHandClauseList, const parser::OmpAtomicClauseList *rightHandClauseList) { @@ -2060,15 +2112,15 @@ void OmpStructureChecker::Enter(const parser::OpenMPAtomicConstruct &x) { atomicWrite.t) .statement); }, - [&](const auto &atomicConstruct) { - const auto &dir{std::get(atomicConstruct.t)}; + [&](const parser::OmpAtomicCapture &atomicCapture) { + const auto &dir{std::get(atomicCapture.t)}; PushContextAndClauseSets( dir.source, llvm::omp::Directive::OMPD_atomic); - CheckAtomicMemoryOrderClause(&std::get<0>(atomicConstruct.t), - &std::get<2>(atomicConstruct.t)); + CheckAtomicMemoryOrderClause( + &std::get<0>(atomicCapture.t), &std::get<2>(atomicCapture.t)); CheckHintClause( - &std::get<0>(atomicConstruct.t), - &std::get<2>(atomicConstruct.t)); + &std::get<0>(atomicCapture.t), &std::get<2>(atomicCapture.t)); + CheckAtomicCaptureConstruct(atomicCapture); }, }, x.u); diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 2cc1a78068f54..8bfd4d594b028 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -193,6 +193,7 @@ class OmpStructureChecker void CheckAtomicUpdateStmt(const parser::AssignmentStmt &); void CheckAtomicCaptureStmt(const parser::AssignmentStmt &); void CheckAtomicWriteStmt(const parser::AssignmentStmt &); + void CheckAtomicCaptureConstruct(const parser::OmpAtomicCapture &); void CheckAtomicConstructStructure(const parser::OpenMPAtomicConstruct &); void CheckDistLinear(const parser::OpenMPLoopConstruct &x); void CheckSIMDNest(const parser::OpenMPConstruct &x); diff --git a/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 b/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 index a346056dee383..0d4da5485af04 100644 --- a/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 +++ b/flang/test/Semantics/OpenMP/omp-atomic-assignment-stmt.f90 @@ -84,4 +84,68 @@ program sample !$omp atomic write !ERROR: Expected scalar variable on the LHS of atomic assignment statement a = x + + !$omp atomic capture + v = x + x = x + 1 + !$omp end atomic + + !$omp atomic release capture + v = x + !ERROR: Atomic update statement should be of form `x = x operator expr` OR `x = expr operator x` + x = b + (x*1) + !$omp end atomic + + !$omp atomic capture hint(0) + v = x + x = 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component x expected to be assigned in the second statement of ATOMIC CAPTURE construct + v = x + b = b + 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component x expected to be assigned in the second statement of ATOMIC CAPTURE construct + v = x + b = 10 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component x expected to be captured in the second statement of ATOMIC CAPTURE construct + x = x + 10 + v = b + !$omp end atomic + + !$omp atomic capture + !ERROR: Invalid ATOMIC CAPTURE construct statements. Expected one of [update-stmt, capture-stmt], [capture-stmt, update-stmt], or [capture-stmt, write-stmt] + v = 1 + x = 4 + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component z%y expected to be assigned in the second statement of ATOMIC CAPTURE construct + x = z%y + z%m = z%m + 1.0 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component z%m expected to be captured in the second statement of ATOMIC CAPTURE construct + z%m = z%m + 1.0 + x = z%y + !$omp end atomic + + !$omp atomic capture + !ERROR: Captured variable/array element/derived-type component y(2) expected to be assigned in the second statement of ATOMIC CAPTURE construct + x = y(2) + y(1) = y(1) + 1 + !$omp end atomic + + !$omp atomic capture + !ERROR: Updated variable/array element/derived-type component y(1) expected to be captured in the second statement of ATOMIC CAPTURE construct + y(1) = y(1) + 1 + x = y(2) + !$omp end atomic end program diff --git a/flang/test/Semantics/OpenMP/requires-atomic01.f90 b/flang/test/Semantics/OpenMP/requires-atomic01.f90 index b39c9cdcc0bb3..cb7b1bc1ac52a 100644 --- a/flang/test/Semantics/OpenMP/requires-atomic01.f90 +++ b/flang/test/Semantics/OpenMP/requires-atomic01.f90 @@ -88,7 +88,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> SeqCst !$omp atomic capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -96,7 +96,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic relaxed capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -104,6 +104,6 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic capture relaxed i = j - i = j + j = j + 1 !$omp end atomic end program requires diff --git a/flang/test/Semantics/OpenMP/requires-atomic02.f90 b/flang/test/Semantics/OpenMP/requires-atomic02.f90 index 3af83970e7927..5a4249794f7b5 100644 --- a/flang/test/Semantics/OpenMP/requires-atomic02.f90 +++ b/flang/test/Semantics/OpenMP/requires-atomic02.f90 @@ -88,7 +88,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> AcqRel !$omp atomic capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -96,7 +96,7 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic relaxed capture i = j - i = j + j = j + 1 !$omp end atomic ! CHECK-LABEL: OpenMPAtomicConstruct -> OmpAtomicCapture @@ -104,6 +104,6 @@ program requires ! CHECK: OmpMemoryOrderClause -> OmpClause -> Relaxed !$omp atomic capture relaxed i = j - i = j + j = j + 1 !$omp end atomic end program requires From cfe1adc42a5e6a81741f868aff2cf06cec3c24c1 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 24 Sep 2024 21:06:13 -0700 Subject: [PATCH 084/212] Reland: [DirectX] Add atan2 intrinsic and expand for DXIL backend (p1) (#109878) This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This preliminary work adds the intrinsic to llvm and expands using atan intrinsic for DXIL backend, since DXIL has no atan2 op. Part 1 for Implement the atan2 HLSL Function #70096. (reland #108865 reverted in #109842 due to doc build break) --- llvm/docs/LangRef.rst | 37 ++++++++ llvm/include/llvm/IR/Intrinsics.td | 1 + .../Target/DirectX/DXILIntrinsicExpansion.cpp | 52 +++++++++++ llvm/test/CodeGen/DirectX/atan2.ll | 87 +++++++++++++++++++ llvm/test/CodeGen/DirectX/atan2_error.ll | 11 +++ 5 files changed, 188 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/atan2.ll create mode 100644 llvm/test/CodeGen/DirectX/atan2_error.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 91c3e60bb0acb..3b905c2788128 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15583,6 +15583,43 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +'``llvm.atan2.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.atan2`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.atan2.f32(float %X, float %Y) + declare double @llvm.atan2.f64(double %X, double %Y) + declare x86_fp80 @llvm.atan2.f80(x86_fp80 %X, x86_fp80 %Y) + declare fp128 @llvm.atan2.f128(fp128 %X, fp128 %Y) + declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %X, ppc_fp128 %Y) + +Overview: +""""""""" + +The '``llvm.atan2.*``' intrinsics return the arctangent of the operand. + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same type. + +Semantics: +"""""""""" + +Return the same value as a corresponding libm '``atan2``' function but without +trapping or setting ``errno``. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.sinh.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 0a74a217a5f01..48d57907e6d0b 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1016,6 +1016,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_asin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_acos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_atan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_atan2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_tan : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index dd73b895b14d3..926cbe97f24fd 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -36,6 +36,7 @@ using namespace llvm; static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::abs: + case Intrinsic::atan2: case Intrinsic::exp: case Intrinsic::log: case Intrinsic::log10: @@ -307,6 +308,54 @@ static Value *expandNormalizeIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, MultiplicandVec); } +static Value *expandAtan2Intrinsic(CallInst *Orig) { + Value *Y = Orig->getOperand(0); + Value *X = Orig->getOperand(1); + Type *Ty = X->getType(); + IRBuilder<> Builder(Orig); + Builder.setFastMathFlags(Orig->getFastMathFlags()); + + Value *Tan = Builder.CreateFDiv(Y, X); + + CallInst *Atan = + Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan}, nullptr, "Elt.Atan"); + Atan->setTailCall(Orig->isTailCall()); + Atan->setAttributes(Orig->getAttributes()); + + // Modify atan result based on https://en.wikipedia.org/wiki/Atan2. + Constant *Pi = ConstantFP::get(Ty, llvm::numbers::pi); + Constant *HalfPi = ConstantFP::get(Ty, llvm::numbers::pi / 2); + Constant *NegHalfPi = ConstantFP::get(Ty, -llvm::numbers::pi / 2); + Constant *Zero = ConstantFP::get(Ty, 0); + Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi); + Value *AtanSubPi = Builder.CreateFSub(Atan, Pi); + + // x > 0 -> atan. + Value *Result = Atan; + Value *XLt0 = Builder.CreateFCmpOLT(X, Zero); + Value *XEq0 = Builder.CreateFCmpOEQ(X, Zero); + Value *YGe0 = Builder.CreateFCmpOGE(Y, Zero); + Value *YLt0 = Builder.CreateFCmpOLT(Y, Zero); + + // x < 0, y >= 0 -> atan + pi. + Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0); + Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result); + + // x < 0, y < 0 -> atan - pi. + Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0); + Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result); + + // x == 0, y < 0 -> -pi/2 + Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0); + Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result); + + // x == 0, y > 0 -> pi/2 + Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0); + Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result); + + return Result; +} + static Value *expandPowIntrinsic(CallInst *Orig) { Value *X = Orig->getOperand(0); @@ -418,6 +467,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::abs: Result = expandAbs(Orig); break; + case Intrinsic::atan2: + Result = expandAtan2Intrinsic(Orig); + break; case Intrinsic::exp: Result = expandExpIntrinsic(Orig); break; diff --git a/llvm/test/CodeGen/DirectX/atan2.ll b/llvm/test/CodeGen/DirectX/atan2.ll new file mode 100644 index 0000000000000..9d86f87f3ed50 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK + +; Make sure correct dxil expansions for atan2 are generated for float and half. + +define noundef float @atan2_float(float noundef %y, float noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv float %y, %x +; EXPCHECK: [[ATAN:%.+]] = call float @llvm.atan.f32(float [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call float @dx.op.unary.f32(i32 17, float [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub float [[ATAN]], 0x400921FB60000000 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt float %x, 0.000000e+00 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq float %x, 0.000000e+00 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge float %y, 0.000000e+00 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt float %y, 0.000000e+00 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], float [[ADD_PI]], float [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], float [[SUB_PI]], float [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], float 0xBFF921FB60000000, float [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], float 0x3FF921FB60000000, float [[SELECT_NEGHPI]] +; CHECK: ret float [[SELECT_HPI]] + %elt.atan2 = call float @llvm.atan2.f32(float %y, float %x) + ret float %elt.atan2 +} + +define noundef half @atan2_half(half noundef %y, half noundef %x) { +entry: +; CHECK: [[DIV:%.+]] = fdiv half %y, %x +; EXPCHECK: [[ATAN:%.+]] = call half @llvm.atan.f16(half [[DIV]]) +; DOPCHECK: [[ATAN:%.+]] = call half @dx.op.unary.f16(i32 17, half [[DIV]]) +; CHECK-DAG: [[ADD_PI:%.+]] = fadd half [[ATAN]], 0xH4248 +; CHECK-DAG: [[SUB_PI:%.+]] = fsub half [[ATAN]], 0xH4248 +; CHECK-DAG: [[X_LT_0:%.+]] = fcmp olt half %x, 0xH0000 +; CHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq half %x, 0xH0000 +; CHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge half %y, 0xH0000 +; CHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt half %y, 0xH0000 +; CHECK: [[XLT0_AND_YGE0:%.+]] = and i1 [[X_LT_0]], [[Y_GE_0]] +; CHECK: [[SELECT_ADD_PI:%.+]] = select i1 [[XLT0_AND_YGE0]], half [[ADD_PI]], half [[ATAN]] +; CHECK: [[XLT0_AND_YLT0:%.+]] = and i1 [[X_LT_0]], [[Y_LT_0]] +; CHECK: [[SELECT_SUB_PI:%.+]] = select i1 [[XLT0_AND_YLT0]], half [[SUB_PI]], half [[SELECT_ADD_PI]] +; CHECK: [[XEQ0_AND_YLT0:%.+]] = and i1 [[X_EQ_0]], [[Y_LT_0]] +; CHECK: [[SELECT_NEGHPI:%.+]] = select i1 [[XEQ0_AND_YLT0]], half 0xHBE48, half [[SELECT_SUB_PI]] +; CHECK: [[XEQ0_AND_YGE0:%.+]] = and i1 [[X_EQ_0]], [[Y_GE_0]] +; CHECK: [[SELECT_HPI:%.+]] = select i1 [[XEQ0_AND_YGE0]], half 0xH3E48, half [[SELECT_NEGHPI]] +; CHECK: ret half [[SELECT_HPI]] + %elt.atan2 = call half @llvm.atan2.f16(half %y, half %x) + ret half %elt.atan2 +} + +define noundef <4 x float> @atan2_float4(<4 x float> noundef %y, <4 x float> noundef %x) { +entry: +; Just Expansion, no scalarization or lowering: +; EXPCHECK: [[DIV:%.+]] = fdiv <4 x float> %y, %x +; EXPCHECK: [[ATAN:%.+]] = call <4 x float> @llvm.atan.v4f32(<4 x float> [[DIV]]) +; EXPCHECK-DAG: [[ADD_PI:%.+]] = fadd <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[SUB_PI:%.+]] = fsub <4 x float> [[ATAN]], +; EXPCHECK-DAG: [[X_LT_0:%.+]] = fcmp olt <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[X_EQ_0:%.+]] = fcmp oeq <4 x float> %x, zeroinitializer +; EXPCHECK-DAG: [[Y_GE_0:%.+]] = fcmp oge <4 x float> %y, zeroinitializer +; EXPCHECK-DAG: [[Y_LT_0:%.+]] = fcmp olt <4 x float> %y, zeroinitializer +; EXPCHECK: [[XLT0_AND_YGE0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_ADD_PI:%.+]] = select <4 x i1> [[XLT0_AND_YGE0]], <4 x float> [[ADD_PI]], <4 x float> [[ATAN]] +; EXPCHECK: [[XLT0_AND_YLT0:%.+]] = and <4 x i1> [[X_LT_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_SUB_PI:%.+]] = select <4 x i1> [[XLT0_AND_YLT0]], <4 x float> [[SUB_PI]], <4 x float> [[SELECT_ADD_PI]] +; EXPCHECK: [[XEQ0_AND_YLT0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_LT_0]] +; EXPCHECK: [[SELECT_NEGHPI:%.+]] = select <4 x i1> [[XEQ0_AND_YLT0]], <4 x float> , <4 x float> [[SELECT_SUB_PI]] +; EXPCHECK: [[XEQ0_AND_YGE0:%.+]] = and <4 x i1> [[X_EQ_0]], [[Y_GE_0]] +; EXPCHECK: [[SELECT_HPI:%.+]] = select <4 x i1> [[XEQ0_AND_YGE0]], <4 x float> , <4 x float> [[SELECT_NEGHPI]] +; EXPCHECK: ret <4 x float> [[SELECT_HPI]] + +; Scalarization occurs after expansion, so atan scalarization is tested separately. +; Expansion, scalarization and lowering: +; Just make sure this expands to exactly 4 scalar DXIL atan (OpCode=17) calls. +; DOPCHECK-COUNT-4: call float @dx.op.unary.f32(i32 17, float %{{.*}}) +; DOPCHECK-NOT: call float @dx.op.unary.f32(i32 17, + + %elt.atan2 = call <4 x float> @llvm.atan2.v4f32(<4 x float> %y, <4 x float> %x) + ret <4 x float> %elt.atan2 +} + +declare half @llvm.atan2.f16(half, half) +declare float @llvm.atan2.f32(float, float) +declare <4 x float> @llvm.atan2.v4f32(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/DirectX/atan2_error.ll b/llvm/test/CodeGen/DirectX/atan2_error.ll new file mode 100644 index 0000000000000..5b3077f85f5d4 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/atan2_error.ll @@ -0,0 +1,11 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation atan does not support double overload type +; CHECK: in function atan2_double +; CHECK-SAME: Cannot create ATan operation: Invalid overload type + +define noundef double @atan2_double(double noundef %a, double noundef %b) #0 { +entry: + %1 = call double @llvm.atan2.f64(double %a, double %b) + ret double %1 +} From b2180481ec2d12628c7d5a784ca4f015e971266c Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 24 Sep 2024 21:11:13 -0700 Subject: [PATCH 085/212] [hwasan] Consider order of mapping copts (#109621) Flags "-hwasan-mapping-offset" and "-hwasan-mapping-offset-dynamic" are mutually exclusive, use the last one. --- .../Instrumentation/HWAddressSanitizer.cpp | 12 +++-- .../HWAddressSanitizer/mapping-override.ll | 50 +++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 7d3db5c2c8d5c..669b63343e994 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -1937,14 +1937,18 @@ void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple, // Fuchsia is always PIE, which means that the beginning of the address // space is always available. SetFixed(0); - } else if (ClMappingOffset.getNumOccurrences() > 0) { - SetFixed(ClMappingOffset); } else if (ClEnableKhwasan || InstrumentWithCalls) { SetFixed(0); WithFrameRecord = false; - } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) { - Kind = ClMappingOffsetDynamic; } WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord); + + // Apply the last of ClMappingOffset and ClMappingOffsetDynamic. + Kind = optOr(ClMappingOffsetDynamic, Kind); + if (ClMappingOffset.getNumOccurrences() > 0 && + !(ClMappingOffsetDynamic.getNumOccurrences() > 0 && + ClMappingOffsetDynamic.getPosition() > ClMappingOffset.getPosition())) { + SetFixed(ClMappingOffset); + } } diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll new file mode 100644 index 0000000000000..5cd23f3ebe2b0 --- /dev/null +++ b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 + +; RUN: opt < %s -passes=hwasan -S | FileCheck %s +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global -S | FileCheck %s --check-prefixes=GLOBAL +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 -S | FileCheck %s --check-prefixes=FIXED +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 -hwasan-mapping-offset-dynamic=global -S | FileCheck %s --check-prefixes=FIXED-GLOBAL +; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global -hwasan-mapping-offset=567 -S | FileCheck %s --check-prefixes=GLOBAL-FIXED + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android" + +define i8 @test_load8(ptr %a) sanitize_hwaddress { +; CHECK-LABEL: define i8 @test_load8 +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; CHECK-NEXT: ret i8 [[B]] +; +; GLOBAL-LABEL: define i8 @test_load8 +; GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; GLOBAL-NEXT: [[TMP1:%.*]] = load ptr, ptr @__hwasan_shadow_memory_dynamic_address, align 8 +; GLOBAL-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], ptr [[A]], i32 0) +; GLOBAL-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; GLOBAL-NEXT: ret i8 [[B]] +; +; FIXED-LABEL: define i8 @test_load8 +; FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; FIXED-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr inttoptr (i64 567 to ptr)) +; FIXED-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; FIXED-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; FIXED-NEXT: ret i8 [[B]] +; +; FIXED-GLOBAL-LABEL: define i8 @test_load8 +; FIXED-GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; FIXED-GLOBAL-NEXT: [[TMP1:%.*]] = load ptr, ptr @__hwasan_shadow_memory_dynamic_address, align 8 +; FIXED-GLOBAL-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], ptr [[A]], i32 0) +; FIXED-GLOBAL-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; FIXED-GLOBAL-NEXT: ret i8 [[B]] +; +; GLOBAL-FIXED-LABEL: define i8 @test_load8 +; GLOBAL-FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; GLOBAL-FIXED-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr inttoptr (i64 567 to ptr)) +; GLOBAL-FIXED-NEXT: call void @llvm.hwasan.check.memaccess(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) +; GLOBAL-FIXED-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 +; GLOBAL-FIXED-NEXT: ret i8 [[B]] +; + %b = load i8, ptr %a, align 4 + ret i8 %b +} From 3b8c78a610451f250a57122cdac394c6c8a66189 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Sep 2024 21:36:14 -0700 Subject: [PATCH 086/212] [RISCV] Enable f16 vget/vset/vcreate/vlmul_ext/vlmul_trunc/vundefined intrinsics with Zvfhmin. (#109889) These intrinsics don't produce any instructions so don't require Zvfh. This makes Zvfhmin consistent with Zvfbfmin. See also https://github.com/riscv-non-isa/rvv-intrinsic-doc/issues/351 --- clang/include/clang/Basic/riscv_vector.td | 44 ++++++++++++++----- .../non-policy/non-overloaded/vcreate.c | 2 +- .../non-policy/non-overloaded/vget.c | 2 +- .../non-policy/non-overloaded/vlmul_ext_v.c | 2 +- .../non-policy/non-overloaded/vlmul_trunc_v.c | 2 +- .../non-policy/non-overloaded/vset.c | 2 +- .../non-policy/non-overloaded/vundefined.c | 2 +- .../non-policy/overloaded/vget.c | 2 +- .../non-policy/overloaded/vlmul_ext_v.c | 2 +- .../non-policy/overloaded/vlmul_trunc_v.c | 2 +- .../non-policy/overloaded/vset.c | 2 +- 11 files changed, 44 insertions(+), 20 deletions(-) diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index 5ef9602433697..6e57e51793a71 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -361,7 +361,11 @@ multiclass RVVNonTupleVCreateBuiltin src_lmul_list> { defvar src_s = FixedVString.S; def vcreate # src_v # dst_v : RVVBuiltin; + "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def vcreate_h # src_v # dst_v : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def vcreate_bf16 # src_v # dst_v : RVVBuiltin; + def vundefined : RVVBuiltin<"v", "v", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def vundefined_h : RVVBuiltin<"v", "v", "x">; let RequiredFeatures = ["Zvfbfmin"] in def vundefined_bf16 : RVVBuiltin<"v", "v", "y">; def vundefined_u : RVVBuiltin<"Uv", "Uv", "csil">; @@ -2482,7 +2488,9 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach nf = NFList in { let NF = nf in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; @@ -2502,7 +2510,10 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach dst_lmul = ["(SFixedLog2LMUL:-3)", "(SFixedLog2LMUL:-2)", "(SFixedLog2LMUL:-1)", "(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { def vlmul_trunc # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", - dst_lmul # "vv", "csilxfd", dst_lmul # "v">; + dst_lmul # "vv", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def vlmul_trunc_h # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", + dst_lmul # "vv", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def vlmul_trunc_bf16 # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vv", "y", dst_lmul # "v">; @@ -2523,7 +2534,10 @@ let HasMasked = false, HasVL = false, IRName = "" in { foreach dst_lmul = ["(LFixedLog2LMUL:-2)", "(LFixedLog2LMUL:-1)", "(LFixedLog2LMUL:-0)", "(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { def vlmul_ext # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", - dst_lmul # "vv", "csilxfd", dst_lmul # "v">; + dst_lmul # "vv", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def vlmul_ext_h # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", + dst_lmul # "vv", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def vlmul_ext_bf16 # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vv", "y", dst_lmul # "v">; @@ -2555,14 +2569,18 @@ let HasMasked = false, HasVL = false, IRName = "" in { } }] in { foreach dst_lmul = ["(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { - def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilxfd", dst_lmul # "v">; + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilfd", dst_lmul # "v">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "x", dst_lmul # "v">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "y", dst_lmul # "v">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUvKz", "csil", dst_lmul # "Uv">; } foreach nf = NFList in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; @@ -2592,14 +2610,18 @@ let HasMasked = false, HasVL = false, IRName = "" in { } }] in { foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { - def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">; + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "x">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "y">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">; } foreach nf = NFList in { defvar T = "(Tuple:" # nf # ")"; - def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "csilxfd">; + def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "csilfd">; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "x">; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin<"v" # T # "v", T # "v" # T # "vKzv", "y">; def : RVVBuiltin<"Uv" # T # "Uv", T # "Uv" # T # "UvKzUv", "csil">; @@ -2646,7 +2668,9 @@ let HasMasked = false, HasVL = false, IRName = "" in { defvar T = "(Tuple:" # nf # ")"; defvar V = VString.S; defvar UV = VString.S; - def : RVVBuiltin; + def : RVVBuiltin; + let RequiredFeatures = ["Zvfhmin"] in + def : RVVBuiltin; let RequiredFeatures = ["Zvfbfmin"] in def : RVVBuiltin; def : RVVBuiltin; diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c index 4cf8bbf6c61ee..e2d493979732b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vcreate.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c index a0e6555c03913..a1ddfc3a92c80 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vget.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c index e6287775ed419..69471904720f8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_ext_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c index dea288bdf4328..a3e8ab87d06a8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vlmul_trunc_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c index 06ccd3125c083..88b00653c56eb 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vset.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c index 5950068181abf..f18b999e89232 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vundefined.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c index e156ec91bfd2c..afc9cff8dec04 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vget.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c index 92b894f1f5ef5..8a01f5ebdbcfe 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_ext_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c index fa923d87bd1ba..18f6901073a1e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vlmul_trunc_v.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c index cc5a32878bd90..b63fa52fa3039 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vset.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zfh \ -// RUN: -target-feature +zvfh -disable-O0-optnone \ +// RUN: -target-feature +zvfhmin -disable-O0-optnone \ // RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ // RUN: FileCheck --check-prefix=CHECK-RV64 %s From d0878f13dffa406a267eb8d0b64f803951e997ea Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Sep 2024 21:36:55 -0700 Subject: [PATCH 087/212] [RISCV] Use RVVBitsPerBlock in assignRVVStackObjectOffsets and adjustReg. NFC (#109848) I think the 8 here represents RVVBitsPerBlock / 8. --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 7 ++++--- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 7abd5a49a1b5f..22824b77c37dd 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1090,11 +1090,12 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { for (int FI : ObjectsToAllocate) { // ObjectSize in bytes. int64_t ObjectSize = MFI.getObjectSize(FI); - auto ObjectAlign = std::max(Align(8), MFI.getObjectAlign(FI)); + auto ObjectAlign = + std::max(Align(RISCV::RVVBitsPerBlock / 8), MFI.getObjectAlign(FI)); // If the data type is the fractional vector type, reserve one vector // register for it. - if (ObjectSize < 8) - ObjectSize = 8; + if (ObjectSize < (RISCV::RVVBitsPerBlock / 8)) + ObjectSize = (RISCV::RVVBitsPerBlock / 8); Offset = alignTo(Offset + ObjectSize, ObjectAlign); MFI.setObjectOffset(FI, -Offset); // Update the maximum alignment of the RVV stack section diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 701594c0fb05d..91d539a355ac2 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -200,11 +200,11 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); assert(ScalableValue > 0 && "There is no need to get VLEN scaled value."); - assert(ScalableValue % 8 == 0 && + assert(ScalableValue % (RISCV::RVVBitsPerBlock / 8) == 0 && "Reserve the stack by the multiple of one vector size."); - assert(isInt<32>(ScalableValue / 8) && + assert(isInt<32>(ScalableValue / (RISCV::RVVBitsPerBlock / 8)) && "Expect the number of vector registers within 32-bits."); - uint32_t NumOfVReg = ScalableValue / 8; + uint32_t NumOfVReg = ScalableValue / (RISCV::RVVBitsPerBlock / 8); BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg) .setMIFlag(Flag); From 74d9f7ce80e8d729c18f12bde73bdd8ea750b369 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 21:49:06 -0700 Subject: [PATCH 088/212] [llvm] Use std::optional::value_or (NFC) (#109890) --- llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h | 3 +-- llvm/tools/llvm-cov/SourceCoverageViewText.cpp | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index b93bc594a82bf..4bdfa1cf4c149 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -916,8 +916,7 @@ bool ConstructDecompositionT::applyClause( /*ReductionIdentifiers=*/std::get(clause.t), /*List=*/objects}}); - ReductionModifier effective = - modifier.has_value() ? *modifier : ReductionModifier::Default; + ReductionModifier effective = modifier.value_or(ReductionModifier::Default); bool effectiveApplied = false; // Walk over the leaf constructs starting from the innermost, and apply // the clause as required by the spec. diff --git a/llvm/tools/llvm-cov/SourceCoverageViewText.cpp b/llvm/tools/llvm-cov/SourceCoverageViewText.cpp index cab60c2d9034e..8b93b592910b3 100644 --- a/llvm/tools/llvm-cov/SourceCoverageViewText.cpp +++ b/llvm/tools/llvm-cov/SourceCoverageViewText.cpp @@ -179,7 +179,7 @@ void SourceCoverageViewText::renderLine(raw_ostream &OS, LineRef L, unsigned Col = 1; for (const auto *S : Segments) { unsigned End = std::min(S->Col, static_cast(Line.size()) + 1); - colored_ostream(OS, Highlight ? *Highlight : raw_ostream::SAVEDCOLOR, + colored_ostream(OS, Highlight.value_or(raw_ostream::SAVEDCOLOR), getOptions().Colors && Highlight, /*Bold=*/false, /*BG=*/true) << Line.substr(Col - 1, End - Col); @@ -196,7 +196,7 @@ void SourceCoverageViewText::renderLine(raw_ostream &OS, LineRef L, } // Show the rest of the line. - colored_ostream(OS, Highlight ? *Highlight : raw_ostream::SAVEDCOLOR, + colored_ostream(OS, Highlight.value_or(raw_ostream::SAVEDCOLOR), getOptions().Colors && Highlight, /*Bold=*/false, /*BG=*/true) << Line.substr(Col - 1, Line.size() - Col + 1); OS << '\n'; From c92137e474d504159bd9d51f125c8a9ba9141109 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 24 Sep 2024 22:05:51 -0700 Subject: [PATCH 089/212] [NFC][TableGen] Adopt scaled indent in PredicateExpander (#109801) Adopt scaled indent in PredicateExpander. Added pre/post inc/dec operators to `indent` and related unit tests. Verified by comparing *.inc files generated by LLVM build with/without the change. --- llvm/include/llvm/Support/raw_ostream.h | 24 ++++ llvm/unittests/Support/raw_ostream_test.cpp | 20 ++++ .../TableGen/Common/PredicateExpander.cpp | 107 +++++++----------- .../utils/TableGen/Common/PredicateExpander.h | 19 ++-- llvm/utils/TableGen/InstrInfoEmitter.cpp | 4 +- .../TableGen/MacroFusionPredicatorEmitter.cpp | 4 +- llvm/utils/TableGen/SubtargetEmitter.cpp | 20 ++-- 7 files changed, 106 insertions(+), 92 deletions(-) diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index c2f2299ed9645..d3b411590e7fd 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -797,6 +797,30 @@ struct indent { assert(NumIndents >= N && "Indentation undeflow"); return indent(NumIndents - N, Scale); } + indent &operator++() { // Prefix ++. + ++NumIndents; + return *this; + } + indent operator++(int) { // Postfix ++. + indent Old = *this; + ++NumIndents; + return Old; + } + indent &operator--() { // Prefix --. + assert(NumIndents >= 1); + --NumIndents; + return *this; + } + indent operator--(int) { // Postfix --. + indent Old = *this; + assert(NumIndents >= 1); + --NumIndents; + return Old; + } + indent &operator=(unsigned N) { + NumIndents = N; + return *this; + } }; inline raw_ostream &operator<<(raw_ostream &OS, const indent &Indent) { diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp index a35edd6168529..fbeff37d26a35 100644 --- a/llvm/unittests/Support/raw_ostream_test.cpp +++ b/llvm/unittests/Support/raw_ostream_test.cpp @@ -198,6 +198,26 @@ TEST(raw_ostreamTest, Indent) { EXPECT_EQ(Spaces(10), printToString(Scaled)); Scaled -= 1; EXPECT_EQ(Spaces(8), printToString(Scaled)); + + // Operators. + Indent = 10; + EXPECT_EQ(Spaces(10), printToString(Indent)); + + indent Temp = Indent++; + EXPECT_EQ(Spaces(11), printToString(Indent)); + EXPECT_EQ(Spaces(10), printToString(Temp)); + + Temp = Indent--; + EXPECT_EQ(Spaces(10), printToString(Indent)); + EXPECT_EQ(Spaces(11), printToString(Temp)); + + Temp = ++Indent; + EXPECT_EQ(Spaces(11), printToString(Indent)); + EXPECT_EQ(Spaces(11), printToString(Temp)); + + Temp = --Indent; + EXPECT_EQ(Spaces(10), printToString(Indent)); + EXPECT_EQ(Spaces(10), printToString(Temp)); } TEST(raw_ostreamTest, FormatHex) { diff --git a/llvm/utils/TableGen/Common/PredicateExpander.cpp b/llvm/utils/TableGen/Common/PredicateExpander.cpp index 2afaa8cc21aa6..314e563ba90bb 100644 --- a/llvm/utils/TableGen/Common/PredicateExpander.cpp +++ b/llvm/utils/TableGen/Common/PredicateExpander.cpp @@ -153,10 +153,9 @@ void PredicateExpander::expandCheckOpcode(raw_ostream &OS, } OS << '('; - increaseIndentLevel(); + ++Indent; for (const Record *Rec : Opcodes) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; if (!First) OS << (shouldNegate() ? "&& " : "|| "); @@ -164,10 +163,8 @@ void PredicateExpander::expandCheckOpcode(raw_ostream &OS, First = false; } - OS << '\n'; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << ')'; + --Indent; + OS << '\n' << Indent << ')'; } void PredicateExpander::expandCheckPseudo(raw_ostream &OS, @@ -187,22 +184,19 @@ void PredicateExpander::expandPredicateSequence( // Okay, there is more than one predicate in the set. bool First = true; OS << (shouldNegate() ? "!(" : "("); - increaseIndentLevel(); + ++Indent; bool OldValue = shouldNegate(); setNegatePredicate(false); for (const Record *Rec : Sequence) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; if (!First) OS << (IsCheckAll ? "&& " : "|| "); expandPredicate(OS, Rec); First = false; } - OS << '\n'; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << ')'; + --Indent; + OS << '\n' << Indent << ')'; setNegatePredicate(OldValue); } @@ -269,15 +263,14 @@ void PredicateExpander::expandReturnStatement(raw_ostream &OS, void PredicateExpander::expandOpcodeSwitchCase(raw_ostream &OS, const Record *Rec) { for (const Record *Opcode : Rec->getValueAsListOfDefs("Opcodes")) { - OS.indent(getIndentLevel() * 2); - OS << "case " << Opcode->getValueAsString("Namespace") + OS << Indent << "case " << Opcode->getValueAsString("Namespace") << "::" << Opcode->getName() << ":\n"; } - increaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + ++Indent; + OS << Indent; expandStatement(OS, Rec->getValueAsDef("CaseStmt")); - decreaseIndentLevel(); + --Indent; } void PredicateExpander::expandOpcodeSwitchStatement( @@ -292,17 +285,12 @@ void PredicateExpander::expandOpcodeSwitchStatement( } // Expand the default case. - SS.indent(getIndentLevel() * 2); - SS << "default:\n"; + SS << Indent << "default:\n"; - increaseIndentLevel(); - SS.indent(getIndentLevel() * 2); + ++Indent; + SS << Indent; expandStatement(SS, Default); - decreaseIndentLevel(); - SS << '\n'; - - SS.indent(getIndentLevel() * 2); - SS << "} // end of switch-stmt"; + SS << '\n' << Indent << "} // end of switch-stmt"; OS << Buffer; } @@ -436,8 +424,7 @@ void STIPredicateExpander::expandHeader(raw_ostream &OS, const Record *Rec = Fn.getDeclaration(); StringRef FunctionName = Rec->getValueAsString("Name"); - OS.indent(getIndentLevel() * 2); - OS << "bool "; + OS << Indent << "bool "; if (shouldExpandDefinition()) OS << getClassPrefix() << "::"; OS << FunctionName << "("; @@ -463,26 +450,22 @@ void STIPredicateExpander::expandPrologue(raw_ostream &OS, bool UpdatesOpcodeMask = Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask"); - increaseIndentLevel(); - unsigned IndentLevel = getIndentLevel(); + ++Indent; for (const Record *Delegate : Fn.getDeclaration()->getValueAsListOfDefs("Delegates")) { - OS.indent(IndentLevel * 2); - OS << "if (" << Delegate->getValueAsString("Name") << "(MI"; + OS << Indent << "if (" << Delegate->getValueAsString("Name") << "(MI"; if (UpdatesOpcodeMask) OS << ", Mask"; if (shouldExpandForMC()) OS << ", ProcessorID"; OS << "))\n"; - OS.indent((1 + IndentLevel) * 2); - OS << "return true;\n\n"; + OS << Indent + 1 << "return true;\n\n"; } if (shouldExpandForMC()) return; - OS.indent(IndentLevel * 2); - OS << "unsigned ProcessorID = getSchedModel().getProcessorID();\n"; + OS << Indent << "unsigned ProcessorID = getSchedModel().getProcessorID();\n"; } void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, @@ -497,8 +480,7 @@ void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, continue; if (FirstProcID) { - OS.indent(getIndentLevel() * 2); - OS << "if (ProcessorID == " << I; + OS << Indent << "if (ProcessorID == " << I; } else { OS << " || ProcessorID == " << I; } @@ -507,21 +489,20 @@ void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, OS << ") {\n"; - increaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + ++Indent; + OS << Indent; if (ShouldUpdateOpcodeMask) { if (PI.OperandMask.isZero()) OS << "Mask.clearAllBits();\n"; else OS << "Mask = " << PI.OperandMask << ";\n"; - OS.indent(getIndentLevel() * 2); + OS << Indent; } OS << "return "; expandPredicate(OS, PI.Predicate); OS << ";\n"; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); - OS << "}\n"; + --Indent; + OS << Indent << "}\n"; } } @@ -530,46 +511,38 @@ void STIPredicateExpander::expandBody(raw_ostream &OS, bool UpdatesOpcodeMask = Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask"); - unsigned IndentLevel = getIndentLevel(); - OS.indent(IndentLevel * 2); - OS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n"; - OS.indent(IndentLevel * 2); - OS << "default:\n"; - OS.indent(IndentLevel * 2); - OS << " break;"; + OS << Indent << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n"; + OS << Indent << "default:\n"; + OS << Indent << " break;"; for (const OpcodeGroup &Group : Fn.getGroups()) { for (const Record *Opcode : Group.getOpcodes()) { - OS << '\n'; - OS.indent(IndentLevel * 2); - OS << "case " << getTargetName() << "::" << Opcode->getName() << ":"; + OS << '\n' + << Indent << "case " << getTargetName() << "::" << Opcode->getName() + << ":"; } OS << '\n'; - increaseIndentLevel(); + ++Indent; expandOpcodeGroup(OS, Group, UpdatesOpcodeMask); - OS.indent(getIndentLevel() * 2); - OS << "break;\n"; - decreaseIndentLevel(); + OS << Indent << "break;\n"; + --Indent; } - OS.indent(IndentLevel * 2); - OS << "}\n"; + OS << Indent << "}\n"; } void STIPredicateExpander::expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn) { - OS << '\n'; - OS.indent(getIndentLevel() * 2); + OS << '\n' << Indent; OS << "return "; expandPredicate(OS, Fn.getDefaultReturnPredicate()); OS << ";\n"; - decreaseIndentLevel(); - OS.indent(getIndentLevel() * 2); + --Indent; StringRef FunctionName = Fn.getDeclaration()->getValueAsString("Name"); - OS << "} // " << ClassPrefix << "::" << FunctionName << "\n\n"; + OS << Indent << "} // " << ClassPrefix << "::" << FunctionName << "\n\n"; } void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS, diff --git a/llvm/utils/TableGen/Common/PredicateExpander.h b/llvm/utils/TableGen/Common/PredicateExpander.h index c0cd69e3cb1f8..0c3a8718a473f 100644 --- a/llvm/utils/TableGen/Common/PredicateExpander.h +++ b/llvm/utils/TableGen/Common/PredicateExpander.h @@ -18,39 +18,38 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { -class raw_ostream; class Record; class PredicateExpander { bool EmitCallsByRef; bool NegatePredicate; bool ExpandForMC; - unsigned IndentLevel; StringRef TargetName; PredicateExpander(const PredicateExpander &) = delete; PredicateExpander &operator=(const PredicateExpander &) = delete; +protected: + indent Indent; + public: - PredicateExpander(StringRef Target) + explicit PredicateExpander(StringRef Target, unsigned Indent = 1) : EmitCallsByRef(true), NegatePredicate(false), ExpandForMC(false), - IndentLevel(1U), TargetName(Target) {} + TargetName(Target), Indent(Indent, 2) {} bool isByRef() const { return EmitCallsByRef; } bool shouldNegate() const { return NegatePredicate; } bool shouldExpandForMC() const { return ExpandForMC; } - unsigned getIndentLevel() const { return IndentLevel; } + indent &getIndent() { return Indent; } StringRef getTargetName() const { return TargetName; } void setByRef(bool Value) { EmitCallsByRef = Value; } void flipNegatePredicate() { NegatePredicate = !NegatePredicate; } void setNegatePredicate(bool Value) { NegatePredicate = Value; } void setExpandForMC(bool Value) { ExpandForMC = Value; } - void setIndentLevel(unsigned Level) { IndentLevel = Level; } - void increaseIndentLevel() { ++IndentLevel; } - void decreaseIndentLevel() { --IndentLevel; } void expandTrue(raw_ostream &OS); void expandFalse(raw_ostream &OS); @@ -116,8 +115,8 @@ class STIPredicateExpander : public PredicateExpander { void expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn); public: - STIPredicateExpander(StringRef Target) - : PredicateExpander(Target), ExpandDefinition(false) {} + explicit STIPredicateExpander(StringRef Target, unsigned Indent = 1) + : PredicateExpander(Target, Indent), ExpandDefinition(false) {} bool shouldExpandDefinition() const { return ExpandDefinition; } StringRef getClassPrefix() const { return ClassPrefix; } diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index cc5ef49385bb8..46605095ba85f 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -711,7 +711,7 @@ void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, OS << "bool " << Rec->getValueAsString("FunctionName"); OS << "(const MCInst &MI) {\n"; - OS.indent(PE.getIndentLevel() * 2); + OS << PE.getIndent(); PE.expandStatement(OS, Rec->getValueAsDef("Body")); OS << "\n}\n\n"; } @@ -914,7 +914,7 @@ void InstrInfoEmitter::emitTIIHelperMethods(raw_ostream &OS, } OS << " {\n"; - OS.indent(PE.getIndentLevel() * 2); + OS << PE.getIndent(); PE.expandStatement(OS, Rec->getValueAsDef("Body")); OS << "\n}\n\n"; } diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp index c4f238b67476a..6ca2fea41230b 100644 --- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp +++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp @@ -160,7 +160,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(const Record *Predicate, OS.indent(4) << "const MachineInstr *MI = FirstMI;\n"; OS.indent(4) << "if ("; PE.setNegatePredicate(true); - PE.setIndentLevel(3); + PE.getIndent() = 3; PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); OS << ")\n"; OS.indent(4) << " return false;\n"; @@ -181,7 +181,7 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(const Record *Predicate, OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n"; OS.indent(4) << "if ("; PE.setNegatePredicate(true); - PE.setIndentLevel(3); + PE.getIndent() = 3; PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); OS << ")\n"; OS.indent(4) << " return false;\n"; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 78d80ff82d6a4..d21e19c060afc 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1576,13 +1576,13 @@ static void emitPredicates(const CodeGenSchedTransition &T, unsigned NumNonTruePreds = T.PredTerm.size() - count_if(T.PredTerm, isTruePredicate); - SS.indent(PE.getIndentLevel() * 2); + SS << PE.getIndent(); if (NumNonTruePreds) { bool FirstNonTruePredicate = true; SS << "if ("; - PE.setIndentLevel(PE.getIndentLevel() + 2); + PE.getIndent() += 2; for (const Record *Rec : T.PredTerm) { // Skip predicates that evaluate to "true". @@ -1593,7 +1593,7 @@ static void emitPredicates(const CodeGenSchedTransition &T, FirstNonTruePredicate = false; } else { SS << "\n"; - SS.indent(PE.getIndentLevel() * 2); + SS << PE.getIndent(); SS << "&& "; } @@ -1610,9 +1610,9 @@ static void emitPredicates(const CodeGenSchedTransition &T, } SS << ")\n"; // end of if-stmt - PE.decreaseIndentLevel(); - SS.indent(PE.getIndentLevel() * 2); - PE.decreaseIndentLevel(); + --PE.getIndent(); + SS << PE.getIndent(); + --PE.getIndent(); } SS << "return " << T.ToClassIdx << "; // " << SC.Name << '\n'; @@ -1736,7 +1736,7 @@ void SubtargetEmitter::emitSchedModelHelpersImpl( FinalT = &T; continue; } - PE.setIndentLevel(3); + PE.getIndent() = 3; emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS); } if (FinalT) @@ -1780,11 +1780,10 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName, << "::resolveVariantSchedClassImpl(SchedClass, MI, MCII, CPUID);\n" << "} // " << ClassName << "::resolveVariantSchedClass\n\n"; - STIPredicateExpander PE(Target); + STIPredicateExpander PE(Target, /*Indent=*/0); PE.setClassPrefix(ClassName); PE.setExpandDefinition(true); PE.setByRef(false); - PE.setIndentLevel(0); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) PE.expandSTIPredicate(OS, Fn); @@ -1962,7 +1961,7 @@ void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n"; OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n"; - STIPredicateExpander PE(Target); + STIPredicateExpander PE(Target, /*Indent=*/0); PE.setExpandForMC(true); PE.setByRef(true); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) @@ -1976,7 +1975,6 @@ void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { std::string ClassPrefix = Target + "MCInstrAnalysis"; PE.setExpandDefinition(true); PE.setClassPrefix(ClassPrefix); - PE.setIndentLevel(0); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) PE.expandSTIPredicate(OS, Fn); From 614aeda93b2225c6eb42b00ba189ba7ca2585c60 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 25 Sep 2024 06:14:43 +0100 Subject: [PATCH 090/212] [RISCV] Mark Zacas as non-experimental (#109651) The extension has been ratified for some time, but we kept it experimental (see #99898) due to . The ABI issue has been resolved by #101023 so I believe there's no known barrier to moving Zacas to non-experimental. --- .../Driver/print-supported-extensions-riscv.c | 2 +- .../test/Preprocessor/riscv-target-features.c | 18 +++++++++--------- llvm/docs/RISCVUsage.rst | 9 ++++++--- llvm/docs/ReleaseNotes.rst | 1 + llvm/lib/Target/RISCV/RISCVFeatures.td | 4 ++-- llvm/lib/TargetParser/Host.cpp | 3 +-- .../RISCV/atomic-cmpxchg-branch-on-result.ll | 6 +++--- llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 12 ++++++------ llvm/test/CodeGen/RISCV/atomic-rmw.ll | 12 ++++++------ llvm/test/CodeGen/RISCV/atomic-signext.ll | 4 ++-- llvm/test/CodeGen/RISCV/attributes.ll | 4 ++-- llvm/test/MC/RISCV/rv32zacas-invalid.s | 2 +- llvm/test/MC/RISCV/rv32zacas-valid.s | 12 ++++++------ llvm/test/MC/RISCV/rv64zacas-invalid.s | 2 +- llvm/test/MC/RISCV/rv64zacas-valid.s | 6 +++--- llvm/test/MC/RISCV/rvzabha-zacas-valid.s | 12 ++++++------ .../TargetParser/RISCVISAInfoTest.cpp | 2 +- 17 files changed, 57 insertions(+), 54 deletions(-) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index 312c462f715d5..a39c1ab36b1db 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -35,6 +35,7 @@ // CHECK-NEXT: za64rs 1.0 'Za64rs' (Reservation Set Size of at Most 64 Bytes) // CHECK-NEXT: zaamo 1.0 'Zaamo' (Atomic Memory Operations) // CHECK-NEXT: zabha 1.0 'Zabha' (Byte and Halfword Atomic Memory Operations) +// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions) // CHECK-NEXT: zalrsc 1.0 'Zalrsc' (Load-Reserved/Store-Conditional) // CHECK-NEXT: zama16b 1.0 'Zama16b' (Atomic 16-byte misaligned loads, stores and AMOs) // CHECK-NEXT: zawrs 1.0 'Zawrs' (Wait on Reservation Set) @@ -171,7 +172,6 @@ // CHECK-NEXT: Experimental extensions // CHECK-NEXT: zicfilp 1.0 'Zicfilp' (Landing pad) // CHECK-NEXT: zicfiss 1.0 'Zicfiss' (Shadow stack) -// CHECK-NEXT: zacas 1.0 'Zacas' (Atomic Compare-And-Swap Instructions) // CHECK-NEXT: zalasr 0.1 'Zalasr' (Load-Acquire and Store-Release Instructions) // CHECK-NEXT: zvbc32e 0.7 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements) // CHECK-NEXT: zvkgs 0.7 'Zvkgs' (Vector-Scalar GCM instructions for Cryptography) diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 60675065495bb..05a8534ba13da 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -87,6 +87,7 @@ // CHECK-NOT: __riscv_za64rs {{.*$}} // CHECK-NOT: __riscv_zaamo {{.*$}} // CHECK-NOT: __riscv_zabha {{.*$}} +// CHECK-NOT: __riscv_zacas {{.*$}} // CHECK-NOT: __riscv_zalrsc {{.*$}} // CHECK-NOT: __riscv_zama16b {{.*$}} // CHECK-NOT: __riscv_zawrs {{.*$}} @@ -183,7 +184,6 @@ // CHECK-NOT: __riscv_ssnpm{{.*$}} // CHECK-NOT: __riscv_sspm{{.*$}} // CHECK-NOT: __riscv_supm{{.*$}} -// CHECK-NOT: __riscv_zacas {{.*$}} // CHECK-NOT: __riscv_zalasr {{.*$}} // CHECK-NOT: __riscv_zfbfmin {{.*$}} // CHECK-NOT: __riscv_zicfilp {{.*$}} @@ -751,6 +751,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZABHA-EXT %s // CHECK-ZABHA-EXT: __riscv_zabha 1000000{{$}} +// RUN: %clang --target=riscv32 \ +// RUN: -march=rv32ia_zacas1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s +// RUN: %clang --target=riscv64 \ +// RUN: -march=rv64ia_zacas1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s +// CHECK-ZACAS-EXT: __riscv_zacas 1000000{{$}} + // RUN: %clang --target=riscv32 \ // RUN: -march=rv32i_zalrsc1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZALRSC-EXT %s @@ -1630,14 +1638,6 @@ // CHECK-ZVKT-EXT: __riscv_zvkt 1000000{{$}} // Experimental extensions -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ -// RUN: -march=rv32ia_zacas1p0 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ -// RUN: -march=rv64ia_zacas1p0 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZACAS-EXT %s -// CHECK-ZACAS-EXT: __riscv_zacas 1000000{{$}} - // RUN: %clang --target=riscv32 -menable-experimental-extensions \ // RUN: -march=rv32i_zalasr0p1 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZALASR-EXT %s diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index f424004857454..7bea64792d472 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -154,6 +154,7 @@ on support follow. ``Za64rs`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zaamo`` Assembly Support ``Zabha`` Supported + ``Zacas`` Supported (`See note<#riscv-zacas-note>`__) ``Zalrsc`` Assembly Support ``Zama16b`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zawrs`` Assembly Support @@ -287,6 +288,11 @@ Supported ``Za128rs``, ``Za64rs``, ``Zama16b``, ``Zic64b``, ``Ziccamoa``, ``Ziccif``, ``Zicclsm``, ``Ziccrse``, ``Shcounterenvw``, ``Shgatpa``, ``Shtvala``, ``Shvsatpa``, ``Shvstvala``, ``Shvstvecd``, ``Ssccptr``, ``Sscounterenw``, ``Ssstateen``, ``Ssstrict``, ``Sstvala``, ``Sstvecd``, ``Ssu64xl``, ``Svade``, ``Svbare`` These extensions are defined as part of the `RISC-V Profiles specification `__. They do not introduce any new features themselves, but instead describe existing hardware features. +.. _riscv-zacas-note: + +``Zacas`` + The compiler will not generate amocas.d on RV32 or amocas.q on RV64 due to ABI compatibilty. These can only be used in the assembler. + Atomics ABIs ============ @@ -304,9 +310,6 @@ The primary goal of experimental support is to assist in the process of ratifica ``experimental-ssnpm``, ``experimental-smnpm``, ``experimental-smmpm``, ``experimental-sspm``, ``experimental-supm`` LLVM implements the `v1.0.0-rc2 specification `__. -``experimental-zacas`` - LLVM implements the `1.0 release specification `__. amocas.w will be used for i32 cmpxchg. amocas.d will be used i64 cmpxchg on RV64. The compiler will not generate amocas.d on RV32 or amocas.q on RV64 due to ABI compatibilty. These can only be used in the assembler. The extension will be left as experimental until `an ABI issue `__ is resolved. - ``experimental-zalasr`` LLVM implements the `0.0.5 draft specification `__. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index c85ea28ad9f8c..9bf838c39643d 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -149,6 +149,7 @@ Changes to the RISC-V Backend * The ``Zvbc32e`` and ``Zvkgs`` extensions are now supported experimentally. * Added ``Smctr`` and ``Ssctr`` extensions. * ``-mcpu=syntacore-scr7`` was added. +* The ``Zacas`` extension is no longer marked as experimental. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 52f5a637eb740..3d0e1dae801d3 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -243,8 +243,8 @@ def HasStdExtZabha : Predicate<"Subtarget->hasStdExtZabha()">, "'Zabha' (Byte and Halfword Atomic Memory Operations)">; def FeatureStdExtZacas - : RISCVExperimentalExtension<"zacas", 1, 0, - "'Zacas' (Atomic Compare-And-Swap Instructions)">, + : RISCVExtension<"zacas", 1, 0, + "'Zacas' (Atomic Compare-And-Swap Instructions)">, RISCVExtensionBitmask<0, 26>; def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">, AssemblerPredicate<(all_of FeatureStdExtZacas), diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index b2c4f9ee00293..616e4eda1dd29 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2040,8 +2040,7 @@ const StringMap sys::getHostCPUFeatures() { Features["zvfhmin"] = ExtMask & (1ULL << 31); // RISCV_HWPROBE_EXT_ZVFHMIN Features["zfa"] = ExtMask & (1ULL << 32); // RISCV_HWPROBE_EXT_ZFA Features["ztso"] = ExtMask & (1ULL << 33); // RISCV_HWPROBE_EXT_ZTSO - // TODO: Re-enable zacas when it is marked non-experimental again. - // Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS + Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS Features["zicond"] = ExtMask & (1ULL << 35); // RISCV_HWPROBE_EXT_ZICOND Features["zihintpause"] = ExtMask & (1ULL << 36); // RISCV_HWPROBE_EXT_ZIHINTPAUSE diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll index e70ba93de75e0..234a956be809e 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=NOZACAS,RV32IA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV32IA-ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=NOZACAS,RV64IA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV64IA-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ZACAS,RV64IA-ZABHA %s ; Test cmpxchg followed by a branch on the cmpxchg success value to see if the diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll index acd6e8f9afe2a..9908503adb9c3 100644 --- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -3,25 +3,25 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-WMO-ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-TSO-ZACAS %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-WMO-ZABHA %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas,+zabha -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZABHA,RV64IA-TSO-ZABHA %s define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 03157e13bff78..f50744fc3c1f3 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -12,22 +12,22 @@ ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS,RV64IA-TSO,RV64IA-TSO-NOZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-WMO,RV32IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS,RV32IA-TSO,RV32IA-TSO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-WMO,RV64IA-WMO-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO,RV64IA-TSO-ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-NOZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-NOZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-ZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-ZACAS %s define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index c143be478948e..ed0a160d3f58a 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -3,13 +3,13 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-NOZACAS %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZACAS %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-NOZACAS %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS %s define signext i8 @atomic_load_i8_unordered(ptr %a) nounwind { diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 1d4a634c89a22..86ce368bc1db6 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -121,7 +121,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZFBFMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV32ZVFBFWMA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas %s -o - | FileCheck --check-prefix=RV32ZACAS %s +; RUN: llc -mtriple=riscv32 -mattr=+a,+zacas %s -o - | FileCheck --check-prefix=RV32ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zalasr %s -o - | FileCheck --check-prefix=RV32ZALASR %s ; RUN: llc -mtriple=riscv32 -mattr=+zama16b %s -o - | FileCheck --check-prefixes=CHECK,RV32ZAMA16B %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zicfilp %s -o - | FileCheck --check-prefix=RV32ZICFILP %s @@ -264,7 +264,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZFBFMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zvfbfmin %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zvfbfwma %s -o - | FileCheck --check-prefixes=CHECK,RV64ZVFBFWMA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas %s -o - | FileCheck --check-prefix=RV64ZACAS %s +; RUN: llc -mtriple=riscv64 -mattr=+a,+zacas %s -o - | FileCheck --check-prefix=RV64ZACAS %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zalasr %s -o - | FileCheck --check-prefix=RV64ZALASR %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zicfilp %s -o - | FileCheck --check-prefix=RV64ZICFILP %s ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha %s -o - | FileCheck --check-prefix=RV64ZABHA %s diff --git a/llvm/test/MC/RISCV/rv32zacas-invalid.s b/llvm/test/MC/RISCV/rv32zacas-invalid.s index bad2edcaaa915..6927a2733b8e6 100644 --- a/llvm/test/MC/RISCV/rv32zacas-invalid.s +++ b/llvm/test/MC/RISCV/rv32zacas-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple riscv32 -mattr=+a,+experimental-zacas < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv32 -mattr=+a,+zacas < %s 2>&1 | FileCheck %s # Non-zero offsets not supported for the third operand (rs1). amocas.w a1, a3, 1(a5) # CHECK: :[[@LINE]]:18: error: optional integer offset must be 0 diff --git a/llvm/test/MC/RISCV/rv32zacas-valid.s b/llvm/test/MC/RISCV/rv32zacas-valid.s index 8ba2b02542bc0..0e76f02399483 100644 --- a/llvm/test/MC/RISCV/rv32zacas-valid.s +++ b/llvm/test/MC/RISCV/rv32zacas-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv32 -mattr=+a -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/RISCV/rv64zacas-invalid.s b/llvm/test/MC/RISCV/rv64zacas-invalid.s index 854e6fe308b0a..e75ff9e9f94ca 100644 --- a/llvm/test/MC/RISCV/rv64zacas-invalid.s +++ b/llvm/test/MC/RISCV/rv64zacas-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+a,+experimental-zacas < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv64 -mattr=+a,+zacas < %s 2>&1 | FileCheck %s # Non-zero offsets not supported for the third operand (rs1). amocas.w a1, a3, 1(a5) # CHECK: :[[@LINE]]:18: error: optional integer offset must be 0 diff --git a/llvm/test/MC/RISCV/rv64zacas-valid.s b/llvm/test/MC/RISCV/rv64zacas-valid.s index d5044a0e0671d..595c70b6e3f5b 100644 --- a/llvm/test/MC/RISCV/rv64zacas-valid.s +++ b/llvm/test/MC/RISCV/rv64zacas-valid.s @@ -1,7 +1,7 @@ -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv64 -mattr=+a -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/test/MC/RISCV/rvzabha-zacas-valid.s b/llvm/test/MC/RISCV/rvzabha-zacas-valid.s index f8aa6867aedc6..97afb9d6563e5 100644 --- a/llvm/test/MC/RISCV/rvzabha-zacas-valid.s +++ b/llvm/test/MC/RISCV/rvzabha-zacas-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zabha,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+a,+zabha,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zabha,+experimental-zacas -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+a,+zabha,+zacas -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zabha,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+zabha,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+a,+zabha,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zabha,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zabha,+experimental-zacas < %s \ -# RUN: | llvm-objdump --mattr=+a,+zabha,+experimental-zacas -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+a,+zabha,+zacas < %s \ +# RUN: | llvm-objdump --mattr=+a,+zabha,+zacas -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # RUN: not llvm-mc -triple=riscv32 -mattr=+a,+zabha -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 6662421eb26d9..33944b64dc157 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -980,6 +980,7 @@ R"(All available -march extensions for RISC-V za64rs 1.0 zaamo 1.0 zabha 1.0 + zacas 1.0 zalrsc 1.0 zama16b 1.0 zawrs 1.0 @@ -1116,7 +1117,6 @@ R"(All available -march extensions for RISC-V Experimental extensions zicfilp 1.0 This is a long dummy description zicfiss 1.0 - zacas 1.0 zalasr 0.1 zvbc32e 0.7 zvkgs 0.7 From f586b1e3f42788025aa6f55be70c5e361cc8b529 Mon Sep 17 00:00:00 2001 From: Tzung-Han Juang Date: Wed, 25 Sep 2024 01:27:21 -0400 Subject: [PATCH 091/212] [MLIR] Make `OneShotModuleBufferize` use `OpInterface` (#107295) **Description:** `OneShotModuleBufferize` deals with the bufferization of `FuncOp`, `CallOp` and `ReturnOp` but they are hard-coded. Any custom function-like operations will not be handled. The PR replaces a part of `FuncOp` and `CallOp` with `FunctionOpInterface` and `CallOpInterface` in `OneShotModuleBufferize` so that custom function ops and call ops can be bufferized. **Related Discord Discussion:** [Link](https://discord.com/channels/636084430946959380/642426447167881246/1280556809911799900) --------- Co-authored-by: erick-xanadu <110487834+erick-xanadu@users.noreply.github.com> --- .../IR/BufferizableOpInterface.h | 7 +- .../FuncBufferizableOpInterfaceImpl.h | 12 +- .../IR/BufferizableOpInterface.cpp | 5 +- .../FuncBufferizableOpInterfaceImpl.cpp | 2 +- .../Transforms/OneShotModuleBufferize.cpp | 111 +++++++------ .../Transforms/transform-ops.mlir | 142 +++++++++-------- mlir/test/Dialect/LLVM/transform-e2e.mlir | 22 +-- .../Linalg/matmul-shared-memory-padding.mlir | 52 +++--- .../Linalg/pad-to-specific-memory-space.mlir | 148 +++++++++--------- .../test/Dialect/Vector/transform-vector.mlir | 84 +++++----- mlir/test/Examples/transform/ChH/full.mlir | 11 +- 11 files changed, 315 insertions(+), 281 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index aceb9d059b95f..d19687ec9afee 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -11,6 +11,7 @@ #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/DenseMapInfoVariant.h" #include "llvm/ADT/SetVector.h" @@ -260,9 +261,9 @@ struct BufferizationOptions { using AnalysisStateInitFn = std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, func op, bufferization options - using FunctionArgTypeConverterFn = - std::function; + using FunctionArgTypeConverterFn = std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, bufferization options using UnknownTypeConverterFn = std::function equivalentFuncArgs; + DenseMap equivalentFuncArgs; /// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices. - DenseMap aliasingReturnVals; + DenseMap aliasingReturnVals; /// A set of all read BlockArguments of FuncOps. - DenseMap readBbArgs; + DenseMap readBbArgs; /// A set of all written-to BlockArguments of FuncOps. - DenseMap writtenBbArgs; + DenseMap writtenBbArgs; /// Keep track of which FuncOps are fully analyzed or currently being /// analyzed. - DenseMap analyzedFuncOps; + DenseMap analyzedFuncOps; /// This function is called right before analyzing the given FuncOp. It /// initializes the data structures for the FuncOp in this state object. - void startFunctionAnalysis(FuncOp funcOp); + void startFunctionAnalysis(FunctionOpInterface funcOp); }; void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 85604eef2f283..92f757111cbaf 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/TypeUtilities.h" #include "mlir/IR/Value.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" +#include "mlir/Interfaces/FunctionInterfaces.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Debug.h" @@ -314,7 +315,7 @@ namespace { /// Default function arg type converter: Use a fully dynamic layout map. BaseMemRefType defaultFunctionArgTypeConverter(TensorType type, Attribute memorySpace, - func::FuncOp funcOp, + FunctionOpInterface funcOp, const BufferizationOptions &options) { return getMemRefTypeWithFullyDynamicLayout(type, memorySpace); } @@ -361,7 +362,7 @@ BufferizationOptions::dynCastBufferizableOp(Value value) const { void BufferizationOptions::setFunctionBoundaryTypeConversion( LayoutMapOption layoutMapOption) { functionArgTypeConverterFn = [=](TensorType tensorType, Attribute memorySpace, - func::FuncOp funcOp, + FunctionOpInterface funcOp, const BufferizationOptions &options) { if (layoutMapOption == LayoutMapOption::IdentityLayoutMap) return bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType, diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index 9fbe574ec392d..9749a71f3514b 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -22,7 +22,7 @@ namespace mlir { namespace bufferization { namespace func_ext { -void FuncAnalysisState::startFunctionAnalysis(FuncOp funcOp) { +void FuncAnalysisState::startFunctionAnalysis(FunctionOpInterface funcOp) { analyzedFuncOps[funcOp] = FuncOpAnalysisState::InProgress; auto createdEquiv = equivalentFuncArgs.try_emplace(funcOp, IndexMapping()); auto createdAliasingResults = diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index 0a4072605c265..f934ae88277b6 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -75,7 +75,7 @@ using namespace mlir::bufferization; using namespace mlir::bufferization::func_ext; /// A mapping of FuncOps to their callers. -using FuncCallerMap = DenseMap>; +using FuncCallerMap = DenseMap>; /// Get or create FuncAnalysisState. static FuncAnalysisState & @@ -88,10 +88,11 @@ getOrCreateFuncAnalysisState(OneShotAnalysisState &state) { /// Return the unique ReturnOp that terminates `funcOp`. /// Return nullptr if there is no such unique ReturnOp. -static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) { - func::ReturnOp returnOp; - for (Block &b : funcOp.getBody()) { - if (auto candidateOp = dyn_cast(b.getTerminator())) { +static Operation *getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { + Operation *returnOp = nullptr; + for (Block &b : funcOp.getFunctionBody()) { + auto candidateOp = b.getTerminator(); + if (candidateOp && candidateOp->hasTrait()) { if (returnOp) return nullptr; returnOp = candidateOp; @@ -126,16 +127,16 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, /// Store function BlockArguments that are equivalent to/aliasing a returned /// value in FuncAnalysisState. static LogicalResult -aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, + OneShotAnalysisState &state, FuncAnalysisState &funcState) { - if (funcOp.getBody().empty()) { + if (funcOp.getFunctionBody().empty()) { // No function body available. Conservatively assume that every tensor // return value may alias with any tensor bbArg. - FunctionType type = funcOp.getFunctionType(); - for (const auto &inputIt : llvm::enumerate(type.getInputs())) { + for (const auto &inputIt : llvm::enumerate(funcOp.getArgumentTypes())) { if (!isa(inputIt.value())) continue; - for (const auto &resultIt : llvm::enumerate(type.getResults())) { + for (const auto &resultIt : llvm::enumerate(funcOp.getResultTypes())) { if (!isa(resultIt.value())) continue; int64_t returnIdx = resultIt.index(); @@ -147,7 +148,7 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, } // Support only single return-terminated block in the function. - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + Operation *returnOp = getAssumedUniqueReturnOp(funcOp); assert(returnOp && "expected func with single return op"); for (OpOperand &returnVal : returnOp->getOpOperands()) @@ -168,8 +169,8 @@ aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, return success(); } -static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, - bool isWritten) { +static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, + bool isRead, bool isWritten) { OpBuilder b(funcOp.getContext()); Attribute accessType; if (isRead && isWritten) { @@ -189,12 +190,12 @@ static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, /// function with unknown ops, we conservatively assume that such ops bufferize /// to a read + write. static LogicalResult -funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, +funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, + OneShotAnalysisState &state, FuncAnalysisState &funcState) { - for (int64_t idx = 0, e = funcOp.getFunctionType().getNumInputs(); idx < e; - ++idx) { + for (int64_t idx = 0, e = funcOp.getNumArguments(); idx < e; ++idx) { // Skip non-tensor arguments. - if (!isa(funcOp.getFunctionType().getInput(idx))) + if (!isa(funcOp.getArgumentTypes()[idx])) continue; bool isRead; bool isWritten; @@ -204,7 +205,7 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, StringRef str = accessAttr.getValue(); isRead = str == "read" || str == "read-write"; isWritten = str == "write" || str == "read-write"; - } else if (funcOp.getBody().empty()) { + } else if (funcOp.getFunctionBody().empty()) { // If the function has no body, conservatively assume that all args are // read + written. isRead = true; @@ -230,20 +231,19 @@ funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, /// Remove bufferization attributes on FuncOp arguments. static void removeBufferizationAttributes(BlockArgument bbArg) { - auto funcOp = cast(bbArg.getOwner()->getParentOp()); + auto funcOp = cast(bbArg.getOwner()->getParentOp()); funcOp.removeArgAttr(bbArg.getArgNumber(), BufferizationDialect::kBufferLayoutAttrName); funcOp.removeArgAttr(bbArg.getArgNumber(), BufferizationDialect::kWritableAttrName); } -/// Return the func::FuncOp called by `callOp`. -static func::FuncOp getCalledFunction(func::CallOp callOp) { +static FunctionOpInterface getCalledFunction(CallOpInterface callOp) { SymbolRefAttr sym = llvm::dyn_cast_if_present(callOp.getCallableForCallee()); if (!sym) return nullptr; - return dyn_cast_or_null( + return dyn_cast_or_null( SymbolTable::lookupNearestSymbolFrom(callOp, sym)); } @@ -251,12 +251,12 @@ static func::FuncOp getCalledFunction(func::CallOp callOp) { /// Note: This only adds new equivalence info if the called function was already /// analyzed. // TODO: This does not handle cyclic function call graphs etc. -static void equivalenceAnalysis(func::FuncOp funcOp, +static void equivalenceAnalysis(FunctionOpInterface funcOp, OneShotAnalysisState &state, FuncAnalysisState &funcState) { - funcOp->walk([&](func::CallOp callOp) { - func::FuncOp calledFunction = getCalledFunction(callOp); - assert(calledFunction && "could not retrieved called func::FuncOp"); + funcOp->walk([&](CallOpInterface callOp) { + FunctionOpInterface calledFunction = getCalledFunction(callOp); + assert(calledFunction && "could not retrieved called FunctionOpInterface"); // No equivalence info available for the called function. if (!funcState.equivalentFuncArgs.count(calledFunction)) @@ -267,7 +267,7 @@ static void equivalenceAnalysis(func::FuncOp funcOp, int64_t bbargIdx = it.second; if (!state.isInPlace(callOp->getOpOperand(bbargIdx))) continue; - Value returnVal = callOp.getResult(returnIdx); + Value returnVal = callOp->getResult(returnIdx); Value argVal = callOp->getOperand(bbargIdx); state.unionEquivalenceClasses(returnVal, argVal); } @@ -277,11 +277,9 @@ static void equivalenceAnalysis(func::FuncOp funcOp, } /// Return "true" if the given function signature has tensor semantics. -static bool hasTensorSignature(func::FuncOp funcOp) { - return llvm::any_of(funcOp.getFunctionType().getInputs(), - llvm::IsaPred) || - llvm::any_of(funcOp.getFunctionType().getResults(), - llvm::IsaPred); +static bool hasTensorSignature(FunctionOpInterface funcOp) { + return llvm::any_of(funcOp.getArgumentTypes(), llvm::IsaPred) || + llvm::any_of(funcOp.getResultTypes(), llvm::IsaPred); } /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by @@ -291,16 +289,16 @@ static bool hasTensorSignature(func::FuncOp funcOp) { /// retrieve the called FuncOp from any func::CallOp. static LogicalResult getFuncOpsOrderedByCalls(ModuleOp moduleOp, - SmallVectorImpl &orderedFuncOps, + SmallVectorImpl &orderedFuncOps, FuncCallerMap &callerMap) { // For each FuncOp, the set of functions called by it (i.e. the union of // symbols of all nested func::CallOp). - DenseMap> calledBy; + DenseMap> calledBy; // For each FuncOp, the number of func::CallOp it contains. - DenseMap numberCallOpsContainedInFuncOp; - WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult { - if (!funcOp.getBody().empty()) { - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + DenseMap numberCallOpsContainedInFuncOp; + WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOp) -> WalkResult { + if (!funcOp.getFunctionBody().empty()) { + Operation *returnOp = getAssumedUniqueReturnOp(funcOp); if (!returnOp) return funcOp->emitError() << "cannot bufferize a FuncOp with tensors and " @@ -309,9 +307,10 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, // Collect function calls and populate the caller map. numberCallOpsContainedInFuncOp[funcOp] = 0; - return funcOp.walk([&](func::CallOp callOp) -> WalkResult { - func::FuncOp calledFunction = getCalledFunction(callOp); - assert(calledFunction && "could not retrieved called func::FuncOp"); + return funcOp.walk([&](CallOpInterface callOp) -> WalkResult { + FunctionOpInterface calledFunction = getCalledFunction(callOp); + assert(calledFunction && + "could not retrieved called FunctionOpInterface"); // If the called function does not have any tensors in its signature, then // it is not necessary to bufferize the callee before the caller. if (!hasTensorSignature(calledFunction)) @@ -349,11 +348,11 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, /// most generic layout map as function return types. After bufferizing the /// entire function body, a more concise memref type can potentially be used for /// the return type of the function. -static void foldMemRefCasts(func::FuncOp funcOp) { - if (funcOp.getBody().empty()) +static void foldMemRefCasts(FunctionOpInterface funcOp) { + if (funcOp.getFunctionBody().empty()) return; - func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); + Operation *returnOp = getAssumedUniqueReturnOp(funcOp); SmallVector resultTypes; for (OpOperand &operand : returnOp->getOpOperands()) { @@ -365,8 +364,8 @@ static void foldMemRefCasts(func::FuncOp funcOp) { } } - auto newFuncType = FunctionType::get( - funcOp.getContext(), funcOp.getFunctionType().getInputs(), resultTypes); + auto newFuncType = FunctionType::get(funcOp.getContext(), + funcOp.getArgumentTypes(), resultTypes); funcOp.setType(newFuncType); } @@ -379,7 +378,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); // A list of functions in the order in which they are analyzed + bufferized. - SmallVector orderedFuncOps; + SmallVector orderedFuncOps; // A mapping of FuncOps to their callers. FuncCallerMap callerMap; @@ -388,7 +387,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, return failure(); // Analyze ops. - for (func::FuncOp funcOp : orderedFuncOps) { + for (FunctionOpInterface funcOp : orderedFuncOps) { if (!state.getOptions().isOpAllowed(funcOp)) continue; @@ -416,7 +415,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, void mlir::bufferization::removeBufferizationAttributesInModule( ModuleOp moduleOp) { - moduleOp.walk([&](func::FuncOp op) { + moduleOp.walk([&](FunctionOpInterface op) { for (BlockArgument bbArg : op.getArguments()) removeBufferizationAttributes(bbArg); }); @@ -430,7 +429,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); // A list of functions in the order in which they are analyzed + bufferized. - SmallVector orderedFuncOps; + SmallVector orderedFuncOps; // A mapping of FuncOps to their callers. FuncCallerMap callerMap; @@ -439,11 +438,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( return failure(); // Bufferize functions. - for (func::FuncOp funcOp : orderedFuncOps) { + for (FunctionOpInterface funcOp : orderedFuncOps) { // Note: It would be good to apply cleanups here but we cannot as aliasInfo // would be invalidated. - if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getSymName())) { + if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getName())) { // This function was not analyzed and RaW conflicts were not resolved. // Buffer copies must be inserted before every write. OneShotBufferizationOptions updatedOptions = options; @@ -463,7 +462,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( // Bufferize all other ops. for (Operation &op : llvm::make_early_inc_range(moduleOp.getOps())) { // Functions were already bufferized. - if (isa(&op)) + if (isa(&op)) continue; if (failed(bufferizeOp(&op, options, statistics))) return failure(); @@ -490,12 +489,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( // FuncOps whose names are specified in options.noAnalysisFuncFilter will // not be analyzed. Ops in these FuncOps will not be analyzed as well. OpFilter::Entry::FilterFn analysisFilterFn = [=](Operation *op) { - auto func = dyn_cast(op); + auto func = dyn_cast(op); if (!func) - func = op->getParentOfType(); + func = op->getParentOfType(); if (func) return llvm::is_contained(options.noAnalysisFuncFilter, - func.getSymName()); + func.getName()); return false; }; OneShotBufferizationOptions updatedOptions(options); diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir index 3c50a9e72d9d9..588aa8a85a84e 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --transform-interpreter %s -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s -split-input-file -verify-diagnostics | FileCheck %s // Test One-Shot Bufferize. @@ -12,19 +12,21 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor -func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index +module @payload attributes { transform.target_tag = "payload" } { + func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: memref.copy %[[A_memref]], %[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] + // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) + // CHECK: memref.copy %[[A_memref]], %[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - // CHECK: return %[[res_tensor]] - return %0 : tensor + // CHECK: return %[[res_tensor]] + return %0 : tensor + } } // ----- @@ -42,19 +44,21 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor // CHECK-NOT: memref.copy -func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index +module @payload attributes { transform.target_tag = "payload" } { + func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] + // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) + // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - // CHECK: return %[[res_tensor]] - return %0 : tensor + // CHECK: return %[[res_tensor]] + return %0 : tensor + } } // ----- @@ -72,13 +76,15 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function_analysis( // CHECK-SAME: %[[A:.*]]: tensor -func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} - // CHECK-SAME: tensor - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - return %0 : tensor +module @payload attributes { transform.target_tag = "payload" } { + func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} + // CHECK-SAME: tensor + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + return %0 : tensor + } } // ----- @@ -95,10 +101,12 @@ module attributes {transform.with_named_sequence} { } } -func.func @test_unknown_op_failure() -> (tensor) { - // expected-error @+1 {{op was not bufferized}} - %0 = "test.dummy_op"() : () -> (tensor) - return %0 : tensor +module @payload attributes { transform.target_tag = "payload" } { + func.func @test_unknown_op_failure() -> (tensor) { + // expected-error @+1 {{op was not bufferized}} + %0 = "test.dummy_op"() : () -> (tensor) + return %0 : tensor + } } // ----- @@ -111,7 +119,7 @@ module attributes {transform.with_named_sequence} { } } -module { +module @payload attributes { transform.target_tag = "payload" } { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { @@ -146,11 +154,13 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[A:.*]]: memref<12x9xf32>, // CHECK-SAME: %[[B:.*]]: memref<9x6xf32>, // CHECK-SAME: %[[C:.*]]: memref<12x6xf32>) -> memref<12x6xf32> { -func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { - // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) - %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> - // CHECK: return %[[C]] : memref<12x6xf32> - return %D : tensor<12x6xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { + // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) + %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> + // CHECK: return %[[C]] : memref<12x6xf32> + return %D : tensor<12x6xf32> + } } // ----- @@ -165,10 +175,12 @@ module attributes {transform.with_named_sequence} { } // Expect `bufferization.empty_tensor_to_alloc_tensor` to replace the tensor.empty. -func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { - // CHECK: bufferization.alloc_tensor - %0 = tensor.empty() : tensor<2x2xf32> - return %0 : tensor<2x2xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK: bufferization.alloc_tensor + %0 = tensor.empty() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> + } } // ----- @@ -185,13 +197,15 @@ module attributes {transform.with_named_sequence} { // CHECK: tensor.extract_slice // CHECK: linalg.fill // CHECK: tensor.insert_slice -func.func @empty_tensor_elimination( - %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { - %0 = tensor.empty() : tensor<5xf32> - %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> - %2 = tensor.insert_slice %1 into %t [1][5][1] - : tensor<5xf32> into tensor<10xf32> - return %2 : tensor<10xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @empty_tensor_elimination( + %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { + %0 = tensor.empty() : tensor<5xf32> + %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> + %2 = tensor.insert_slice %1 into %t [1][5][1] + : tensor<5xf32> into tensor<10xf32> + return %2 : tensor<10xf32> + } } // ----- @@ -208,12 +222,14 @@ module attributes {transform.with_named_sequence} { // CHECK: memref.alloca // CHECK: scf.for // CHECK: memref.store -func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { - scf.for %iv = %lb to %ub step %step { - %0 = memref.alloca() : memref<5xf32> - memref.store %f, %0[%pos] : memref<5xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { + scf.for %iv = %lb to %ub step %step { + %0 = memref.alloca() : memref<5xf32> + memref.store %f, %0[%pos] : memref<5xf32> + } + return } - return } // ----- @@ -231,10 +247,12 @@ module attributes {transform.with_named_sequence} { // Expect `bufferization.bufferize_to_allocation` to create an alloc. // CHECK-LABEL: func.func @empty_to_tensor_alloc() -func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { - // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> - // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> - // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> - %0 = bufferization.alloc_tensor() : tensor<2x2xf32> - return %0 : tensor<2x2xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> + // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> + // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> + %0 = bufferization.alloc_tensor() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> + } } diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir index c00b47fb936e9..3e637a3ec49a4 100644 --- a/mlir/test/Dialect/LLVM/transform-e2e.mlir +++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir @@ -1,15 +1,17 @@ -// RUN: mlir-opt %s --transform-interpreter -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s +// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s // CHECK-LABEL: llvm.func @matmul_tensors -func.func @matmul_tensors( - %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> { -// CHECK-NOT: linalg -// CHECK: llvm.intr.fmuladd{{.*}} - %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) - outs(%arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> - return %0 : tensor<2x6xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul_tensors( + %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) + -> tensor<2x6xf32> { + // CHECK-NOT: linalg + // CHECK: llvm.intr.fmuladd{{.*}} + %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) + outs(%arg2: tensor<2x6xf32>) + -> tensor<2x6xf32> + return %0 : tensor<2x6xf32> + } } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir index 3f8d2ea06641e..9c223737750a9 100644 --- a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir +++ b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file --transform-interpreter %s | FileCheck %s +// RUN: mlir-opt --split-input-file --transform-interpreter="debug-payload-root-tag=payload" %s | FileCheck %s // CHECK-LABEL: func @matmul_divisible // CHECK: scf.forall @@ -24,19 +24,21 @@ // CHECK: scf.forall // CHECK: vector.transfer_read // CHECK: vector.transfer_write -func.func @matmul_divisible(%A: tensor<1024x1024xf32>, - %B: tensor<1024x1024xf32>, - %C: tensor<1024x1024xf32>) - -> tensor<1024x1024xf32> -{ - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%cst : f32) - outs(%C : tensor<1024x1024xf32>) +module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul_divisible(%A: tensor<1024x1024xf32>, + %B: tensor<1024x1024xf32>, + %C: tensor<1024x1024xf32>) -> tensor<1024x1024xf32> - %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) - outs(%0 : tensor<1024x1024xf32>) - -> tensor<1024x1024xf32> - return %1 : tensor<1024x1024xf32> + { + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill ins(%cst : f32) + outs(%C : tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> + %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) + outs(%0 : tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> + return %1 : tensor<1024x1024xf32> + } } module attributes {transform.with_named_sequence} { @@ -143,19 +145,21 @@ module attributes {transform.with_named_sequence} { // CHECK: linalg.matmul // CHECK: vector.transfer_read // CHECK: vector.transfer_write +module @payload attributes { transform.target_tag = "payload" } { func.func @matmul_not_divisible(%A: tensor<1023x1023xf32>, - %B: tensor<1023x1023xf32>, - %C: tensor<1023x1023xf32>) - -> tensor<1023x1023xf32> -{ - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%cst : f32) - outs(%C : tensor<1023x1023xf32>) + %B: tensor<1023x1023xf32>, + %C: tensor<1023x1023xf32>) -> tensor<1023x1023xf32> - %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) - outs(%0 : tensor<1023x1023xf32>) - -> tensor<1023x1023xf32> - return %1 : tensor<1023x1023xf32> + { + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill ins(%cst : f32) + outs(%C : tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> + %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) + outs(%0 : tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> + return %1 : tensor<1023x1023xf32> + } } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir index f2e9e839b7c46..5e5657980ba12 100644 --- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt --transform-interpreter -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s +// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s #map = affine_map<()[s0] -> (-s0 + 12, 7)> @@ -7,43 +7,45 @@ // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, -func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, - %iv2 : index) -> tensor<24x25xf32> { - %0 = affine.min #map()[%iv2] - - // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] - %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] - %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor - // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] - %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] - // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] - // CHECK: memref.copy %[[s0]], %[[alloc0_view]] - - // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] - // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] - // CHECK: memref.copy %[[s1]], %[[alloc1_view]] - - // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] - // No subview because there is 0 padding - // CHECK: memref.copy %[[s2]], %[[alloc2]] - - // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) - // Copy back result. - // CHECK: memref.copy %[[alloc2]], %[[s2]] - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> - - // insert_slice bufferizes to a no-op. - %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - func.return %5 : tensor<24x25xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, + %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> + // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] + // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] + // CHECK: memref.copy %[[s0]], %[[alloc0_view]] + + // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> + // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] + // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] + // CHECK: memref.copy %[[s1]], %[[alloc1_view]] + + // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> + // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] + // No subview because there is 0 padding + // CHECK: memref.copy %[[s2]], %[[alloc2]] + + // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) + // Copy back result. + // CHECK: memref.copy %[[alloc2]], %[[s2]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + + // insert_slice bufferizes to a no-op. + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> + } } module attributes {transform.with_named_sequence} { @@ -69,40 +71,42 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, -func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, - %iv2 : index) -> tensor<24x25xf32> { - %0 = affine.min #map()[%iv2] - - // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] - %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] - %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor - // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] - %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] - // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] - - // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] - // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] - - // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] - // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] - - // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) - // Copy back result. - // CHECK: memref.copy %[[alloc2]], %[[s2]] - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> - - // insert_slice bufferizes to a no-op. - %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - func.return %5 : tensor<24x25xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, + %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] + // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] + + // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] + // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] + + // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] + // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] + + // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) + // Copy back result. + // CHECK: memref.copy %[[alloc2]], %[[s2]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + + // insert_slice bufferizes to a no-op. + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> + } } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir index 4b38db79bff3e..0439844dc66ca 100644 --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -1,16 +1,18 @@ -// RUN: mlir-opt %s --transform-interpreter --split-input-file | FileCheck %s +// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s --split-input-file | FileCheck %s // CHECK-LABEL: func @matmul_tensors -func.func @matmul_tensors( - %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> { -// CHECK-NOT: linalg -// CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> -// CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> - %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) - outs(%arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> - return %0 : tensor<8x32xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @matmul_tensors( + %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) + -> tensor<8x32xf32> { + // CHECK-NOT: linalg + // CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> + // CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> + %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) + outs(%arg2: tensor<8x32xf32>) + -> tensor<8x32xf32> + return %0 : tensor<8x32xf32> + } } module attributes {transform.with_named_sequence} { @@ -76,11 +78,13 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] : vector<64x64xf16>, vector<64x64xf16> into vector<64x64xf32> // CHECK-NEXT: return %[[R]] : vector<64x64xf32> -func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { - %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> - %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> - %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> - return %result : vector<64x64xf32> +module @payload attributes { transform.target_tag = "payload" } { + func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { + %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> + %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> + %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> + return %result : vector<64x64xf32> + } } module attributes {transform.with_named_sequence} { @@ -95,30 +99,32 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 -// CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, -// CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { -// CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> -// CHECK: return %[[RES]] : vector<[4]x[4]xi32> -func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { - %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> - %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> - %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> - %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> - return %mul: vector<[4]x[4]xi32> -} +module @payload attributes { transform.target_tag = "payload" } { + // CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 + // CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, + // CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { + // CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> + // CHECK: return %[[RES]] : vector<[4]x[4]xi32> + func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { + %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> + %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> + %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> + %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> + return %mul: vector<[4]x[4]xi32> + } -// CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 -// CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, -// CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { -// CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> -// CHECK: return %[[RES]] : vector<8x16xf32> -func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { - %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> - %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> - %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> - %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> - return %mul: vector<8x16xf32> + // CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 + // CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, + // CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { + // CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> + // CHECK: return %[[RES]] : vector<8x16xf32> + func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { + %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> + %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> + %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> + %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> + return %mul: vector<8x16xf32> + } } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Examples/transform/ChH/full.mlir b/mlir/test/Examples/transform/ChH/full.mlir index 259475ebdbf49..85dbf67023323 100644 --- a/mlir/test/Examples/transform/ChH/full.mlir +++ b/mlir/test/Examples/transform/ChH/full.mlir @@ -1,8 +1,6 @@ -// RUN: mlir-opt %s --transform-interpreter \ -// RUN: --test-transform-dialect-erase-schedule \ -// RUN: --math-uplift-to-fma \ -// RUN: --convert-bufferization-to-memref \ -// RUN: --test-lower-to-llvm |\ +// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" \ +// RUN: --test-transform-dialect-erase-schedule |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(builtin.module(math-uplift-to-fma,convert-bufferization-to-memref,test-lower-to-llvm))' - |\ // RUN: FileCheck %s // Fixed-size tensor types to be used in convolution. @@ -19,6 +17,7 @@ // tensors annotated with attributes from the `bufferization` dialect. These // attributes hint the bufferization pass to assume buffers can be directly // used for these tensors without reshaping. +module @payload attributes { transform.target_tag = "payload" } { func.func @conv( %input: !tinput {bufferization.writable = false, bufferization.access = "read", @@ -84,7 +83,7 @@ func.func @conv( return %relued : !toutput } - +} // Module containing the transformation script to be applied. The attribute // is required to correctly verify the use of named (macro-like) sequences. module attributes { transform.with_named_sequence } { From 470e5afe6969530f39eb46c129bfb52b2729ed37 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Tue, 24 Sep 2024 22:35:50 -0700 Subject: [PATCH 092/212] [bazel] Port f586b1e3f42788025aa6f55be70c5e361cc8b529 (#109908) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 304bee99b323f..f5437245e8e13 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -12974,6 +12974,7 @@ cc_library( ":ControlFlowInterfaces", ":ConvertToLLVMInterface", ":DestinationStyleOpInterface", + ":FunctionInterfaces", ":IR", ":InferIntRangeCommon", ":InferIntRangeInterface", From e1365ce2220dade4e771881dcde9a7e0511ce451 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 25 Sep 2024 07:44:05 +0200 Subject: [PATCH 093/212] [clang][bytecode][NFC] Add type assertions to ArrayElem{,Pop} (#109829) So we don't accidentally try to use those with the wrong type. --- clang/lib/AST/ByteCode/Interp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 1f4c302b26197..b029399a1554b 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -2573,6 +2573,7 @@ inline bool ArrayElem(InterpState &S, CodePtr OpPC, uint32_t Index) { if (!CheckLoad(S, OpPC, Ptr)) return false; + assert(Ptr.atIndex(Index).getFieldDesc()->getPrimType() == Name); S.Stk.push(Ptr.atIndex(Index).deref()); return true; } @@ -2584,6 +2585,7 @@ inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) { if (!CheckLoad(S, OpPC, Ptr)) return false; + assert(Ptr.atIndex(Index).getFieldDesc()->getPrimType() == Name); S.Stk.push(Ptr.atIndex(Index).deref()); return true; } From 915fe84c6d78f3c0268f3de571eb323304089d23 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 22:53:16 -0700 Subject: [PATCH 094/212] [ctx_prof] Simplify ingestContext (NFC) (#109902) try_emplace can default-construct the value, so: try_emplace(CSId, CallTargetMapTy()) try_emplace(CSId) are equivalent to each other. We can further simplify the function using the fact that Map.try_emplace(Key).first->second is the same as Map[Key]. --- llvm/include/llvm/ProfileData/PGOCtxProfReader.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h index beda07d7b8286..a00c21ddc7d7a 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h @@ -68,8 +68,7 @@ class PGOCtxProfContext final { CallsiteMapTy &callsites() { return Callsites; } void ingestContext(uint32_t CSId, PGOCtxProfContext &&Other) { - auto [Iter, _] = callsites().try_emplace(CSId, CallTargetMapTy()); - Iter->second.emplace(Other.guid(), std::move(Other)); + callsites()[CSId].emplace(Other.guid(), std::move(Other)); } void ingestAllContexts(uint32_t CSId, CallTargetMapTy &&Other) { From fa824dc0dd960214865b03d8f56b18bb93e4a88b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 13:58:23 +0800 Subject: [PATCH 095/212] [LLVM][IR] Add constant range support for floating-point types (#86483) This patch adds basic constant range support for floating-point types to enable range-based optimizations. --- llvm/include/llvm/ADT/APFloat.h | 5 + llvm/include/llvm/IR/ConstantFPRange.h | 204 ++++++++++ llvm/lib/IR/CMakeLists.txt | 1 + llvm/lib/IR/ConstantFPRange.cpp | 249 +++++++++++++ llvm/lib/Support/APFloat.cpp | 7 +- llvm/unittests/IR/CMakeLists.txt | 1 + llvm/unittests/IR/ConstantFPRangeTest.cpp | 433 ++++++++++++++++++++++ 7 files changed, 898 insertions(+), 2 deletions(-) create mode 100644 llvm/include/llvm/IR/ConstantFPRange.h create mode 100644 llvm/lib/IR/ConstantFPRange.cpp create mode 100644 llvm/unittests/IR/ConstantFPRangeTest.cpp diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 7039e961bff82..9cc8369a0bf52 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -1534,6 +1534,11 @@ inline APFloat maximumnum(const APFloat &A, const APFloat &B) { return A < B ? B : A; } +inline raw_ostream &operator<<(raw_ostream &OS, const APFloat &V) { + V.print(OS); + return OS; +} + // We want the following functions to be available in the header for inlining. // We cannot define them inline in the class definition of `DoubleAPFloat` // because doing so would instantiate `std::unique_ptr` before diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h new file mode 100644 index 0000000000000..23f0e8b8e0d13 --- /dev/null +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -0,0 +1,204 @@ +//===- ConstantFPRange.h - Represent a range for floating-point -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Represent a range of possible values that may occur when the program is run +// for a floating-point value. This keeps track of a lower and upper bound for +// the constant. +// +// Range = [Lower, Upper] U (MayBeQNaN ? QNaN : {}) U (MayBeSNaN ? SNaN : {}) +// Specifically, [inf, -inf] represents an empty set. +// Note: +// 1. Bounds are inclusive. +// 2. -0 is considered to be less than 0. That is, range [0, 0] doesn't contain +// -0. +// 3. Currently wrapping ranges are not supported. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_CONSTANTFPRANGE_H +#define LLVM_IR_CONSTANTFPRANGE_H + +#include "llvm/ADT/APFloat.h" +#include "llvm/IR/Instructions.h" +#include + +namespace llvm { + +class raw_ostream; +struct KnownFPClass; + +/// This class represents a range of floating-point values. +class [[nodiscard]] ConstantFPRange { + APFloat Lower, Upper; + bool MayBeQNaN : 1; + bool MayBeSNaN : 1; + + /// Create empty constant range with same semantics. + ConstantFPRange getEmpty() const { + return ConstantFPRange(getSemantics(), /*IsFullSet=*/false); + } + + /// Create full constant range with same semantics. + ConstantFPRange getFull() const { + return ConstantFPRange(getSemantics(), /*IsFullSet=*/true); + } + + void makeEmpty(); + void makeFull(); + bool isNaNOnly() const; + + /// Initialize a full or empty set for the specified semantics. + explicit ConstantFPRange(const fltSemantics &Sem, bool IsFullSet); + +public: + /// Initialize a range to hold the single specified value. + explicit ConstantFPRange(const APFloat &Value); + + /// Initialize a range of values explicitly. + /// Note: If \p LowerVal is greater than \p UpperVal, please use the canonical + /// form [Inf, -Inf]. + ConstantFPRange(APFloat LowerVal, APFloat UpperVal, bool MayBeQNaN, + bool MayBeSNaN); + + /// Create empty constant range with the given semantics. + static ConstantFPRange getEmpty(const fltSemantics &Sem) { + return ConstantFPRange(Sem, /*IsFullSet=*/false); + } + + /// Create full constant range with the given semantics. + static ConstantFPRange getFull(const fltSemantics &Sem) { + return ConstantFPRange(Sem, /*IsFullSet=*/true); + } + + /// Helper for (-inf, inf) to represent all finite values. + static ConstantFPRange getFinite(const fltSemantics &Sem); + + /// Create a range which doesn't contain NaNs. + static ConstantFPRange getNonNaN(APFloat LowerVal, APFloat UpperVal) { + return ConstantFPRange(std::move(LowerVal), std::move(UpperVal), + /*MayBeQNaN=*/false, /*MayBeSNaN=*/false); + } + + /// Create a range which may contain NaNs. + static ConstantFPRange getMayBeNaN(APFloat LowerVal, APFloat UpperVal) { + return ConstantFPRange(std::move(LowerVal), std::move(UpperVal), + /*MayBeQNaN=*/true, /*MayBeSNaN=*/true); + } + + /// Create a range which only contains NaNs. + static ConstantFPRange getNaNOnly(const fltSemantics &Sem, bool MayBeQNaN, + bool MayBeSNaN); + + /// Produce the smallest range such that all values that may satisfy the given + /// predicate with any value contained within Other is contained in the + /// returned range. Formally, this returns a superset of + /// 'union over all y in Other . { x : fcmp op x y is true }'. If the exact + /// answer is not representable as a ConstantFPRange, the return value will be + /// a proper superset of the above. + /// + /// Example: Pred = ole and Other = float [2, 5] returns Result = [-inf, 5] + static ConstantFPRange makeAllowedFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other); + + /// Produce the largest range such that all values in the returned range + /// satisfy the given predicate with all values contained within Other. + /// Formally, this returns a subset of + /// 'intersection over all y in Other . { x : fcmp op x y is true }'. If the + /// exact answer is not representable as a ConstantFPRange, the return value + /// will be a proper subset of the above. + /// + /// Example: Pred = ole and Other = float [2, 5] returns [-inf, 2] + static ConstantFPRange makeSatisfyingFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other); + + /// Produce the exact range such that all values in the returned range satisfy + /// the given predicate with any value contained within Other. Formally, this + /// returns the exact answer when the superset of 'union over all y in Other + /// is exactly same as the subset of intersection over all y in Other. + /// { x : fcmp op x y is true}'. + /// + /// Example: Pred = olt and Other = float 3 returns [-inf, 3) + static ConstantFPRange makeExactFCmpRegion(FCmpInst::Predicate Pred, + const APFloat &Other); + + /// Does the predicate \p Pred hold between ranges this and \p Other? + /// NOTE: false does not mean that inverse predicate holds! + bool fcmp(FCmpInst::Predicate Pred, const ConstantFPRange &Other) const; + + /// Return the lower value for this range. + const APFloat &getLower() const { return Lower; } + + /// Return the upper value for this range. + const APFloat &getUpper() const { return Upper; } + + bool containsNaN() const { return MayBeQNaN || MayBeSNaN; } + bool containsQNaN() const { return MayBeQNaN; } + bool containsSNaN() const { return MayBeSNaN; } + + /// Get the semantics of this ConstantFPRange. + const fltSemantics &getSemantics() const { return Lower.getSemantics(); } + + /// Return true if this set contains all of the elements possible + /// for this data-type. + bool isFullSet() const; + + /// Return true if this set contains no members. + bool isEmptySet() const; + + /// Return true if the specified value is in the set. + bool contains(const APFloat &Val) const; + + /// Return true if the other range is a subset of this one. + bool contains(const ConstantFPRange &CR) const; + + /// If this set contains a single element, return it, otherwise return null. + const APFloat *getSingleElement() const; + + /// Return true if this set contains exactly one member. + bool isSingleElement() const { return getSingleElement() != nullptr; } + + /// Return true if the sign bit of all values in this range is 1. + /// Return false if the sign bit of all values in this range is 0. + /// Otherwise, return std::nullopt. + std::optional getSignBit() const; + + /// Return true if this range is equal to another range. + bool operator==(const ConstantFPRange &CR) const; + /// Return true if this range is not equal to another range. + bool operator!=(const ConstantFPRange &CR) const { return !operator==(CR); } + + /// Return the FPClassTest which will return true for the value. + FPClassTest classify() const; + + /// Return known floating-point classes for values in this range. + KnownFPClass toKnownFPClass() const; + + /// Print out the bounds to a stream. + void print(raw_ostream &OS) const; + + /// Allow printing from a debugger easily. + void dump() const; + + /// Return the range that results from the intersection of this range with + /// another range. + ConstantFPRange intersectWith(const ConstantFPRange &CR) const; + + /// Return the range that results from the union of this range + /// with another range. The resultant range is guaranteed to include the + /// elements of both sets, but may contain more. + ConstantFPRange unionWith(const ConstantFPRange &CR) const; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) { + CR.print(OS); + return OS; +} + +} // end namespace llvm + +#endif // LLVM_IR_CONSTANTFPRANGE_H diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 91e0e0cc65f36..e5756940dd5a0 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMCore BuiltinGCs.cpp Comdat.cpp ConstantFold.cpp + ConstantFPRange.cpp ConstantRange.cpp ConstantRangeList.cpp Constants.cpp diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp new file mode 100644 index 0000000000000..58aab353b4393 --- /dev/null +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -0,0 +1,249 @@ +//===- ConstantFPRange.cpp - ConstantFPRange implementation ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/ConstantFPRange.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +void ConstantFPRange::makeEmpty() { + auto &Sem = Lower.getSemantics(); + Lower = APFloat::getInf(Sem, /*Negative=*/false); + Upper = APFloat::getInf(Sem, /*Negative=*/true); + MayBeQNaN = false; + MayBeSNaN = false; +} + +void ConstantFPRange::makeFull() { + auto &Sem = Lower.getSemantics(); + Lower = APFloat::getInf(Sem, /*Negative=*/true); + Upper = APFloat::getInf(Sem, /*Negative=*/false); + MayBeQNaN = true; + MayBeSNaN = true; +} + +bool ConstantFPRange::isNaNOnly() const { + return Lower.isPosInfinity() && Upper.isNegInfinity(); +} + +ConstantFPRange::ConstantFPRange(const fltSemantics &Sem, bool IsFullSet) + : Lower(Sem, APFloat::uninitialized), Upper(Sem, APFloat::uninitialized) { + Lower = APFloat::getInf(Sem, /*Negative=*/IsFullSet); + Upper = APFloat::getInf(Sem, /*Negative=*/!IsFullSet); + MayBeQNaN = IsFullSet; + MayBeSNaN = IsFullSet; +} + +ConstantFPRange::ConstantFPRange(const APFloat &Value) + : Lower(Value.getSemantics(), APFloat::uninitialized), + Upper(Value.getSemantics(), APFloat::uninitialized) { + if (Value.isNaN()) { + makeEmpty(); + bool IsSNaN = Value.isSignaling(); + MayBeQNaN = !IsSNaN; + MayBeSNaN = IsSNaN; + } else { + Lower = Upper = Value; + MayBeQNaN = MayBeSNaN = false; + } +} + +// We treat that -0 is less than 0 here. +static APFloat::cmpResult strictCompare(const APFloat &LHS, + const APFloat &RHS) { + assert(!LHS.isNaN() && !RHS.isNaN() && "Unordered compare"); + if (LHS.isZero() && RHS.isZero()) { + if (LHS.isNegative() == RHS.isNegative()) + return APFloat::cmpEqual; + return LHS.isNegative() ? APFloat::cmpLessThan : APFloat::cmpGreaterThan; + } + return LHS.compare(RHS); +} + +static bool isNonCanonicalEmptySet(const APFloat &Lower, const APFloat &Upper) { + return strictCompare(Lower, Upper) == APFloat::cmpGreaterThan && + !(Lower.isInfinity() && Upper.isInfinity()); +} + +static void canonicalizeRange(APFloat &Lower, APFloat &Upper) { + if (isNonCanonicalEmptySet(Lower, Upper)) { + Lower = APFloat::getInf(Lower.getSemantics(), /*Negative=*/false); + Upper = APFloat::getInf(Upper.getSemantics(), /*Negative=*/true); + } +} + +ConstantFPRange::ConstantFPRange(APFloat LowerVal, APFloat UpperVal, + bool MayBeQNaN, bool MayBeSNaN) + : Lower(std::move(LowerVal)), Upper(std::move(UpperVal)) { + assert(&Lower.getSemantics() == &Upper.getSemantics() && + "Should only use the same semantics"); + assert(!isNonCanonicalEmptySet(Lower, Upper) && "Non-canonical form"); + this->MayBeQNaN = MayBeQNaN; + this->MayBeSNaN = MayBeSNaN; +} + +ConstantFPRange ConstantFPRange::getFinite(const fltSemantics &Sem) { + return ConstantFPRange(APFloat::getLargest(Sem, /*Negative=*/true), + APFloat::getLargest(Sem, /*Negative=*/false), + /*MayBeQNaN=*/false, /*MayBeSNaN=*/false); +} + +ConstantFPRange ConstantFPRange::getNaNOnly(const fltSemantics &Sem, + bool MayBeQNaN, bool MayBeSNaN) { + return ConstantFPRange(APFloat::getInf(Sem, /*Negative=*/false), + APFloat::getInf(Sem, /*Negative=*/true), MayBeQNaN, + MayBeSNaN); +} + +ConstantFPRange +ConstantFPRange::makeAllowedFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) { + // TODO + return getFull(Other.getSemantics()); +} + +ConstantFPRange +ConstantFPRange::makeSatisfyingFCmpRegion(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) { + // TODO + return getEmpty(Other.getSemantics()); +} + +ConstantFPRange ConstantFPRange::makeExactFCmpRegion(FCmpInst::Predicate Pred, + const APFloat &Other) { + return makeAllowedFCmpRegion(Pred, ConstantFPRange(Other)); +} + +bool ConstantFPRange::fcmp(FCmpInst::Predicate Pred, + const ConstantFPRange &Other) const { + return makeSatisfyingFCmpRegion(Pred, Other).contains(*this); +} + +bool ConstantFPRange::isFullSet() const { + return Lower.isNegInfinity() && Upper.isPosInfinity() && MayBeQNaN && + MayBeSNaN; +} + +bool ConstantFPRange::isEmptySet() const { + return Lower.isPosInfinity() && Upper.isNegInfinity() && !MayBeQNaN && + !MayBeSNaN; +} + +bool ConstantFPRange::contains(const APFloat &Val) const { + assert(&getSemantics() == &Val.getSemantics() && + "Should only use the same semantics"); + + if (Val.isNaN()) + return Val.isSignaling() ? MayBeSNaN : MayBeQNaN; + return strictCompare(Lower, Val) != APFloat::cmpGreaterThan && + strictCompare(Val, Upper) != APFloat::cmpGreaterThan; +} + +bool ConstantFPRange::contains(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + + if (CR.MayBeQNaN && !MayBeQNaN) + return false; + + if (CR.MayBeSNaN && !MayBeSNaN) + return false; + + return strictCompare(Lower, CR.Lower) != APFloat::cmpGreaterThan && + strictCompare(CR.Upper, Upper) != APFloat::cmpGreaterThan; +} + +const APFloat *ConstantFPRange::getSingleElement() const { + if (MayBeSNaN || MayBeQNaN) + return nullptr; + return Lower.bitwiseIsEqual(Upper) ? &Lower : nullptr; +} + +std::optional ConstantFPRange::getSignBit() const { + if (!MayBeSNaN && !MayBeQNaN && Lower.isNegative() == Upper.isNegative()) + return Lower.isNegative(); + return std::nullopt; +} + +bool ConstantFPRange::operator==(const ConstantFPRange &CR) const { + if (MayBeSNaN != CR.MayBeSNaN || MayBeQNaN != CR.MayBeQNaN) + return false; + return Lower.bitwiseIsEqual(CR.Lower) && Upper.bitwiseIsEqual(CR.Upper); +} + +FPClassTest ConstantFPRange::classify() const { + uint32_t Mask = fcNone; + if (MayBeSNaN) + Mask |= fcSNan; + if (MayBeQNaN) + Mask |= fcQNan; + if (!isNaNOnly()) { + FPClassTest LowerMask = Lower.classify(); + FPClassTest UpperMask = Upper.classify(); + assert(LowerMask <= UpperMask && "Range is nan-only."); + for (uint32_t I = LowerMask; I <= UpperMask; I <<= 1) + Mask |= I; + } + return static_cast(Mask); +} + +KnownFPClass ConstantFPRange::toKnownFPClass() const { + KnownFPClass Result; + Result.KnownFPClasses = classify(); + Result.SignBit = getSignBit(); + return Result; +} + +void ConstantFPRange::print(raw_ostream &OS) const { + if (isFullSet()) + OS << "full-set"; + else if (isEmptySet()) + OS << "empty-set"; + else { + bool NaNOnly = isNaNOnly(); + if (!NaNOnly) + OS << '[' << Lower << ", " << Upper << ']'; + + if (MayBeSNaN || MayBeQNaN) { + if (!NaNOnly) + OS << " with "; + if (MayBeSNaN && MayBeQNaN) + OS << "NaN"; + else if (MayBeSNaN) + OS << "SNaN"; + else if (MayBeQNaN) + OS << "QNaN"; + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ConstantFPRange::dump() const { print(dbgs()); } +#endif + +ConstantFPRange +ConstantFPRange::intersectWith(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + APFloat NewLower = maxnum(Lower, CR.Lower); + APFloat NewUpper = minnum(Upper, CR.Upper); + canonicalizeRange(NewLower, NewUpper); + return ConstantFPRange(std::move(NewLower), std::move(NewUpper), + MayBeQNaN & CR.MayBeQNaN, MayBeSNaN & CR.MayBeSNaN); +} + +ConstantFPRange ConstantFPRange::unionWith(const ConstantFPRange &CR) const { + assert(&getSemantics() == &CR.getSemantics() && + "Should only use the same semantics"); + return ConstantFPRange(minnum(Lower, CR.Lower), maxnum(Upper, CR.Upper), + MayBeQNaN | CR.MayBeQNaN, MayBeSNaN | CR.MayBeSNaN); +} diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 7f68c5ab9b7cf..dee917fd56104 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -5366,11 +5366,14 @@ APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) { void APFloat::print(raw_ostream &OS) const { SmallVector Buffer; toString(Buffer); - OS << Buffer << "\n"; + OS << Buffer; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void APFloat::dump() const { + print(dbgs()); + dbgs() << '\n'; +} #endif void APFloat::Profile(FoldingSetNodeID &NID) const { diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index 633166221c690..e5c8630f3eed7 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_unittest(IRTests BasicBlockTest.cpp BasicBlockDbgInfoTest.cpp CFGBuilder.cpp + ConstantFPRangeTest.cpp ConstantRangeTest.cpp ConstantRangeListTest.cpp ConstantsTest.cpp diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp new file mode 100644 index 0000000000000..dbb6c4545a183 --- /dev/null +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -0,0 +1,433 @@ +//===- ConstantRangeTest.cpp - ConstantRange tests ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/ConstantFPRange.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/KnownBits.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +class ConstantFPRangeTest : public ::testing::Test { +protected: + static const fltSemantics &Sem; + static ConstantFPRange Full; + static ConstantFPRange Empty; + static ConstantFPRange Finite; + static ConstantFPRange One; + static ConstantFPRange PosZero; + static ConstantFPRange NegZero; + static ConstantFPRange Zero; + static ConstantFPRange PosInf; + static ConstantFPRange NegInf; + static ConstantFPRange Denormal; + static ConstantFPRange NaN; + static ConstantFPRange SNaN; + static ConstantFPRange QNaN; + static ConstantFPRange Some; + static ConstantFPRange SomePos; + static ConstantFPRange SomeNeg; +}; + +const fltSemantics &ConstantFPRangeTest::Sem = APFloat::IEEEdouble(); +ConstantFPRange ConstantFPRangeTest::Full = + ConstantFPRange::getFull(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::Empty = + ConstantFPRange::getEmpty(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::Finite = + ConstantFPRange::getFinite(APFloat::IEEEdouble()); +ConstantFPRange ConstantFPRangeTest::One = ConstantFPRange(APFloat(1.0)); +ConstantFPRange ConstantFPRangeTest::PosZero = ConstantFPRange( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::NegZero = + ConstantFPRange(APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true)); +ConstantFPRange ConstantFPRangeTest::Zero = ConstantFPRange::getNonNaN( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true), + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::Denormal = + ConstantFPRange(APFloat::getSmallest(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::PosInf = + ConstantFPRange(APFloat::getInf(APFloat::IEEEdouble(), /*Negative=*/false)); +ConstantFPRange ConstantFPRangeTest::NegInf = + ConstantFPRange(APFloat::getInf(APFloat::IEEEdouble(), /*Negative=*/true)); +ConstantFPRange ConstantFPRangeTest::NaN = ConstantFPRange::getNaNOnly( + APFloat::IEEEdouble(), /*MayBeQNaN=*/true, /*MayBeSNaN=*/true); +ConstantFPRange ConstantFPRangeTest::SNaN = + ConstantFPRange(APFloat::getSNaN(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::QNaN = + ConstantFPRange(APFloat::getQNaN(APFloat::IEEEdouble())); +ConstantFPRange ConstantFPRangeTest::Some = + ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(3.0)); +ConstantFPRange ConstantFPRangeTest::SomePos = ConstantFPRange::getNonNaN( + APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false), APFloat(3.0)); +ConstantFPRange ConstantFPRangeTest::SomeNeg = ConstantFPRange::getNonNaN( + APFloat(-3.0), APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/true)); + +static void strictNext(APFloat &V) { + // Note: nextUp(+/-0) is smallest. + if (V.isNegZero()) + V = APFloat::getZero(V.getSemantics(), /*Negative=*/false); + else + V.next(/*nextDown=*/false); +} + +template +static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive, + bool MayBeQNaN, bool MayBeSNaN) { + const fltSemantics &Sem = APFloat::Float8E4M3(); + APFloat PosInf = APFloat::getInf(Sem, /*Negative=*/false); + APFloat NegInf = APFloat::getInf(Sem, /*Negative=*/true); + TestFn(ConstantFPRange(PosInf, NegInf, MayBeQNaN, MayBeSNaN)); + + if (!Exhaustive) { + SmallVector Values; + Values.push_back(APFloat::getInf(Sem, /*Negative=*/true)); + Values.push_back(APFloat::getLargest(Sem, /*Negative=*/true)); + unsigned BitWidth = APFloat::semanticsSizeInBits(Sem); + unsigned Exponents = APFloat::semanticsMaxExponent(Sem) - + APFloat::semanticsMinExponent(Sem) + 3; + unsigned MantissaBits = APFloat::semanticsPrecision(Sem) - 1; + // Add -2^(max exponent), -2^(max exponent-1), ..., -2^(min exponent) + for (unsigned M = Exponents - 2; M != 0; --M) + Values.push_back( + APFloat(Sem, APInt(BitWidth, (M + Exponents) << MantissaBits))); + Values.push_back(APFloat::getSmallest(Sem, /*Negative=*/true)); + Values.push_back(APFloat::getZero(Sem, /*Negative=*/true)); + size_t E = Values.size(); + for (size_t I = 1; I <= E; ++I) + Values.push_back(-Values[E - I]); + for (size_t I = 0; I != Values.size(); ++I) + for (size_t J = I; J != Values.size(); ++J) + TestFn(ConstantFPRange(Values[I], Values[J], MayBeQNaN, MayBeSNaN)); + return; + } + + auto Next = [&](APFloat &V) { + if (V.isPosInfinity()) + return false; + strictNext(V); + return true; + }; + + APFloat Lower = NegInf; + do { + APFloat Upper = Lower; + do { + TestFn(ConstantFPRange(Lower, Upper, MayBeQNaN, MayBeSNaN)); + } while (Next(Upper)); + } while (Next(Lower)); +} + +template +static void EnumerateConstantFPRanges(Fn TestFn, bool Exhaustive) { + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false, + /*MayBeSNaN=*/false); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false, + /*MayBeSNaN=*/true); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/false); + EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true, + /*MayBeSNaN=*/true); +} + +template +static void EnumerateTwoInterestingConstantFPRanges(Fn TestFn, + bool Exhaustive) { + EnumerateConstantFPRanges( + [&](const ConstantFPRange &CR1) { + EnumerateConstantFPRanges( + [&](const ConstantFPRange &CR2) { TestFn(CR1, CR2); }, Exhaustive); + }, + Exhaustive); +} + +template +static void EnumerateValuesInConstantFPRange(const ConstantFPRange &CR, + Fn TestFn) { + const fltSemantics &Sem = CR.getSemantics(); + unsigned Bits = APFloat::semanticsSizeInBits(Sem); + assert(Bits < 32 && "Too many bits"); + for (unsigned I = 0, E = (1U << Bits) - 1; I != E; ++I) { + APFloat V(Sem, APInt(Bits, I)); + if (CR.contains(V)) + TestFn(V); + } +} + +TEST_F(ConstantFPRangeTest, Basics) { + EXPECT_TRUE(Full.isFullSet()); + EXPECT_FALSE(Full.isEmptySet()); + EXPECT_TRUE(Full.contains(APFloat::getNaN(Sem))); + EXPECT_TRUE(Full.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_TRUE(Full.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(Full.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_TRUE(Full.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_TRUE(Full.contains(APFloat::getSmallest(Sem))); + EXPECT_TRUE(Full.contains(APFloat(2.0))); + EXPECT_TRUE(Full.contains(Full)); + EXPECT_TRUE(Full.contains(Empty)); + EXPECT_TRUE(Full.contains(Finite)); + EXPECT_TRUE(Full.contains(Zero)); + EXPECT_TRUE(Full.contains(Some)); + + EXPECT_FALSE(Empty.isFullSet()); + EXPECT_TRUE(Empty.isEmptySet()); + EXPECT_FALSE(Empty.contains(APFloat::getNaN(Sem))); + EXPECT_FALSE(Empty.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_FALSE(Empty.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_FALSE(Empty.contains(APFloat(2.0))); + EXPECT_TRUE(Empty.contains(Empty)); + + EXPECT_FALSE(Finite.isFullSet()); + EXPECT_FALSE(Finite.isEmptySet()); + EXPECT_FALSE(Finite.contains(APFloat::getNaN(Sem))); + EXPECT_FALSE(Finite.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_FALSE(Finite.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(Finite.contains(APFloat::getLargest(Sem, /*Negative=*/false))); + EXPECT_TRUE(Finite.contains(APFloat::getLargest(Sem, /*Negative=*/true))); + EXPECT_TRUE(Finite.contains(Finite)); + EXPECT_TRUE(Finite.contains(Some)); + EXPECT_TRUE(Finite.contains(Denormal)); + EXPECT_TRUE(Finite.contains(Zero)); + EXPECT_FALSE(Finite.contains(PosInf)); + EXPECT_FALSE(Finite.contains(NaN)); + + EXPECT_TRUE(One.contains(APFloat(1.0))); + EXPECT_FALSE(One.contains(APFloat(1.1))); + + EXPECT_TRUE(PosZero.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_FALSE(PosZero.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_TRUE(NegZero.contains(APFloat::getZero(Sem, /*Negative=*/true))); + EXPECT_FALSE(NegZero.contains(APFloat::getZero(Sem, /*Negative=*/false))); + EXPECT_TRUE(Zero.contains(PosZero)); + EXPECT_TRUE(Zero.contains(NegZero)); + EXPECT_TRUE(Denormal.contains(APFloat::getSmallest(Sem))); + EXPECT_FALSE(Denormal.contains(APFloat::getSmallestNormalized(Sem))); + EXPECT_TRUE(PosInf.contains(APFloat::getInf(Sem, /*Negative=*/false))); + EXPECT_TRUE(NegInf.contains(APFloat::getInf(Sem, /*Negative=*/true))); + EXPECT_TRUE(NaN.contains(APFloat::getQNaN(Sem))); + EXPECT_TRUE(NaN.contains(APFloat::getSNaN(Sem))); + EXPECT_TRUE(NaN.contains(SNaN)); + EXPECT_TRUE(NaN.contains(QNaN)); + + EXPECT_TRUE(Some.contains(APFloat(3.0))); + EXPECT_TRUE(Some.contains(APFloat(-3.0))); + EXPECT_FALSE(Some.contains(APFloat(4.0))); + APFloat Next1(3.0); + Next1.next(/*nextDown=*/true); + EXPECT_TRUE(Some.contains(Next1)); + APFloat Next2(3.0); + Next2.next(/*nextDown=*/false); + EXPECT_FALSE(Some.contains(Next2)); + EXPECT_TRUE(Some.contains(Zero)); + EXPECT_TRUE(Some.contains(Some)); + EXPECT_TRUE(Some.contains(One)); + EXPECT_FALSE(Some.contains(NaN)); + EXPECT_FALSE(Some.contains(PosInf)); + EXPECT_TRUE(SomePos.contains(APFloat(3.0))); + EXPECT_FALSE(SomeNeg.contains(APFloat(3.0))); + EXPECT_TRUE(SomeNeg.contains(APFloat(-3.0))); + EXPECT_FALSE(SomePos.contains(APFloat(-3.0))); + EXPECT_TRUE(Some.contains(SomePos)); + EXPECT_TRUE(Some.contains(SomeNeg)); +} + +TEST_F(ConstantFPRangeTest, Equality) { + EXPECT_EQ(Full, Full); + EXPECT_EQ(Empty, Empty); + EXPECT_EQ(One, One); + EXPECT_EQ(Some, Some); + EXPECT_NE(Full, Empty); + EXPECT_NE(Zero, PosZero); + EXPECT_NE(One, NaN); + EXPECT_NE(Some, One); + EXPECT_NE(SNaN, QNaN); +} + +TEST_F(ConstantFPRangeTest, SingleElement) { + EXPECT_EQ(Full.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Empty.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Finite.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(Zero.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(NaN.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(SNaN.getSingleElement(), static_cast(nullptr)); + EXPECT_EQ(QNaN.getSingleElement(), static_cast(nullptr)); + + EXPECT_EQ(*One.getSingleElement(), APFloat(1.0)); + EXPECT_EQ(*PosZero.getSingleElement(), APFloat::getZero(Sem)); + EXPECT_EQ(*PosInf.getSingleElement(), APFloat::getInf(Sem)); + + EXPECT_FALSE(Full.isSingleElement()); + EXPECT_FALSE(Empty.isSingleElement()); + EXPECT_TRUE(One.isSingleElement()); + EXPECT_FALSE(Some.isSingleElement()); + EXPECT_FALSE(Zero.isSingleElement()); +} + +TEST_F(ConstantFPRangeTest, ExhaustivelyEnumerate) { + constexpr unsigned NNaNValues = (1 << 8) - 2 * ((1 << 3) - 1); + constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); + unsigned Count = 0; + EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, + /*Exhaustive=*/true); + EXPECT_EQ(Expected, Count); +} + +TEST_F(ConstantFPRangeTest, Enumerate) { + constexpr unsigned NNaNValues = 2 * ((1 << 4) - 2 + 4); + constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1); + unsigned Count = 0; + EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; }, + /*Exhaustive=*/false); + EXPECT_EQ(Expected, Count); +} + +TEST_F(ConstantFPRangeTest, IntersectWith) { + EXPECT_EQ(Empty.intersectWith(Full), Empty); + EXPECT_EQ(Empty.intersectWith(Empty), Empty); + EXPECT_EQ(Empty.intersectWith(One), Empty); + EXPECT_EQ(Empty.intersectWith(Some), Empty); + EXPECT_EQ(Full.intersectWith(Full), Full); + EXPECT_EQ(Some.intersectWith(Some), Some); + EXPECT_EQ(Some.intersectWith(One), One); + EXPECT_EQ(Full.intersectWith(One), One); + EXPECT_EQ(Full.intersectWith(Some), Some); + EXPECT_EQ(Some.intersectWith(SomePos), SomePos); + EXPECT_EQ(Some.intersectWith(SomeNeg), SomeNeg); + EXPECT_EQ(NaN.intersectWith(Finite), Empty); + EXPECT_EQ(NaN.intersectWith(SNaN), SNaN); + EXPECT_EQ(NaN.intersectWith(QNaN), QNaN); + EXPECT_EQ(Finite.intersectWith(One), One); + EXPECT_EQ(Some.intersectWith(Zero), Zero); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(4.0)) + .intersectWith( + ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(4.0))); + EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0)) + .intersectWith( + ConstantFPRange::getNonNaN(APFloat(5.0), APFloat(6.0))), + Empty); +} + +TEST_F(ConstantFPRangeTest, UnionWith) { + EXPECT_EQ(Empty.unionWith(Full), Full); + EXPECT_EQ(Empty.unionWith(Empty), Empty); + EXPECT_EQ(Empty.unionWith(One), One); + EXPECT_EQ(Empty.unionWith(Some), Some); + EXPECT_EQ(Full.unionWith(Full), Full); + EXPECT_EQ(Some.unionWith(Some), Some); + EXPECT_EQ(Some.unionWith(One), Some); + EXPECT_EQ(Full.unionWith(Some), Full); + EXPECT_EQ(Some.unionWith(SomePos), Some); + EXPECT_EQ(Some.unionWith(SomeNeg), Some); + EXPECT_EQ(Finite.unionWith(One), Finite); + EXPECT_EQ(Some.unionWith(Zero), Some); + EXPECT_EQ(Finite.unionWith(PosInf).unionWith(NegInf).unionWith(NaN), Full); + EXPECT_EQ(PosZero.unionWith(NegZero), Zero); + EXPECT_EQ(NaN.unionWith(SNaN), NaN); + EXPECT_EQ(NaN.unionWith(QNaN), NaN); + EXPECT_EQ(SNaN.unionWith(QNaN), NaN); + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(4.0)) + .unionWith(ConstantFPRange::getNonNaN(APFloat(3.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(6.0))); + EXPECT_EQ( + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(2.0)) + .unionWith(ConstantFPRange::getNonNaN(APFloat(5.0), APFloat(6.0))), + ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(6.0))); +} + +TEST_F(ConstantFPRangeTest, FPClassify) { + EXPECT_EQ(Empty.classify(), fcNone); + EXPECT_EQ(Full.classify(), fcAllFlags); + EXPECT_EQ(Finite.classify(), fcFinite); + EXPECT_EQ(Zero.classify(), fcZero); + EXPECT_EQ(NaN.classify(), fcNan); + EXPECT_EQ(SNaN.classify(), fcSNan); + EXPECT_EQ(QNaN.classify(), fcQNan); + EXPECT_EQ(One.classify(), fcPosNormal); + EXPECT_EQ(Some.classify(), fcFinite); + EXPECT_EQ(SomePos.classify(), fcPosFinite); + EXPECT_EQ(SomeNeg.classify(), fcNegFinite); + EXPECT_EQ(PosInf.classify(), fcPosInf); + EXPECT_EQ(NegInf.classify(), fcNegInf); + EXPECT_TRUE(SomePos.toKnownFPClass().cannotBeOrderedLessThanZero()); + EXPECT_EQ(Finite.getSignBit(), std::nullopt); + EXPECT_EQ(PosZero.getSignBit(), false); + EXPECT_EQ(NegZero.getSignBit(), true); + EXPECT_EQ(SomePos.getSignBit(), false); + EXPECT_EQ(SomeNeg.getSignBit(), true); + EXPECT_EQ(SomePos.toKnownFPClass().SignBit, false); + EXPECT_EQ(SomeNeg.toKnownFPClass().SignBit, true); + + EnumerateConstantFPRanges( + [](const ConstantFPRange &CR) { + unsigned Mask = fcNone; + bool HasPos = false, HasNeg = false; + EnumerateValuesInConstantFPRange(CR, [&](const APFloat &V) { + Mask |= V.classify(); + if (V.isNegative()) + HasNeg = true; + else + HasPos = true; + }); + + std::optional SignBit = std::nullopt; + if (HasPos != HasNeg) + SignBit = HasNeg; + + EXPECT_EQ(SignBit, CR.getSignBit()) << CR; + EXPECT_EQ(Mask, CR.classify()) << CR; + }, + /*Exhaustive=*/true); +} + +TEST_F(ConstantFPRangeTest, Print) { + auto ToString = [](const ConstantFPRange &CR) { + std::string Str; + raw_string_ostream OS(Str); + CR.print(OS); + return Str; + }; + + EXPECT_EQ(ToString(Full), "full-set"); + EXPECT_EQ(ToString(Empty), "empty-set"); + EXPECT_EQ(ToString(NaN), "NaN"); + EXPECT_EQ(ToString(SNaN), "SNaN"); + EXPECT_EQ(ToString(QNaN), "QNaN"); + EXPECT_EQ(ToString(One), "[1, 1]"); + EXPECT_EQ(ToString(Some.unionWith(SNaN)), "[-3, 3] with SNaN"); +} + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG +TEST_F(ConstantFPRangeTest, NonCanonicalEmptySet) { + EXPECT_DEATH(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(0.0)), + "Non-canonical form"); +} +TEST_F(ConstantFPRangeTest, MismatchedSemantics) { + EXPECT_DEATH(ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(1.0f)), + "Should only use the same semantics"); + EXPECT_DEATH(One.contains(APFloat(1.0f)), + "Should only use the same semantics"); + ConstantFPRange OneF32 = ConstantFPRange(APFloat(1.0f)); + EXPECT_DEATH(One.contains(OneF32), "Should only use the same semantics"); + EXPECT_DEATH(One.intersectWith(OneF32), "Should only use the same semantics"); + EXPECT_DEATH(One.unionWith(OneF32), "Should only use the same semantics"); +} +#endif +#endif + +} // anonymous namespace From d8f555d62546fc1fbee3d2a48b37779c629bf87b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 13:59:10 +0800 Subject: [PATCH 096/212] [UBSan] Diagnose assumption violation (#104741) This patch extends [D34590](https://reviews.llvm.org/D34590) to check assumption violations. --------- Co-authored-by: Vitaly Buka --- clang/lib/CodeGen/CGBuiltin.cpp | 21 ++++++++++++++++--- clang/lib/CodeGen/CGStmt.cpp | 2 +- clang/lib/CodeGen/CodeGenFunction.h | 5 +++++ clang/test/CodeGen/ubsan-builtin-checks.c | 17 +++++++++++++++ compiler-rt/lib/ubsan/ubsan_handlers.cpp | 9 +++++--- compiler-rt/lib/ubsan/ubsan_handlers.h | 1 + .../test/ubsan/TestCases/Misc/builtins.cpp | 16 ++++++++++++-- 7 files changed, 62 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 942468204f054..7bbf22569e73c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1997,8 +1997,8 @@ struct CallObjCArcUse final : EHScopeStack::Cleanup { Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind) { - assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) - && "Unsupported builtin check kind"); + assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) && + "Unsupported builtin check kind"); Value *ArgValue = EmitScalarExpr(E); if (!SanOpts.has(SanitizerKind::Builtin)) @@ -2015,6 +2015,21 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, return ArgValue; } +Value *CodeGenFunction::EmitCheckedArgForAssume(const Expr *E) { + Value *ArgValue = EvaluateExprAsBool(E); + if (!SanOpts.has(SanitizerKind::Builtin)) + return ArgValue; + + SanitizerScope SanScope(this); + EmitCheck( + std::make_pair(ArgValue, SanitizerKind::Builtin), + SanitizerHandler::InvalidBuiltin, + {EmitCheckSourceLocation(E->getExprLoc()), + llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)}, + std::nullopt); + return ArgValue; +} + static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) { return CGF.Builder.CreateBinaryIntrinsic( Intrinsic::abs, ArgValue, @@ -3428,7 +3443,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (E->getArg(0)->HasSideEffects(getContext())) return RValue::get(nullptr); - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0)); Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); Builder.CreateCall(FnAssume, ArgValue); return RValue::get(nullptr); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 623857b43a557..9bf15fca0de48 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -754,7 +754,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { const Expr *Assumption = cast(A)->getAssumption(); if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() && !Assumption->HasSideEffects(getContext())) { - llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption); + llvm::Value *AssumptionVal = EmitCheckedArgForAssume(Assumption); Builder.CreateAssumption(AssumptionVal); } } break; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 2df17e83bae2e..d4e46f77868ec 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5084,12 +5084,17 @@ class CodeGenFunction : public CodeGenTypeCache { enum BuiltinCheckKind { BCK_CTZPassedZero, BCK_CLZPassedZero, + BCK_AssumePassedFalse, }; /// Emits an argument for a call to a builtin. If the builtin sanitizer is /// enabled, a runtime check specified by \p Kind is also emitted. llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind); + /// Emits an argument for a call to a `__builtin_assume`. If the builtin + /// sanitizer is enabled, a runtime check is also emitted. + llvm::Value *EmitCheckedArgForAssume(const Expr *E); + /// Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); diff --git a/clang/test/CodeGen/ubsan-builtin-checks.c b/clang/test/CodeGen/ubsan-builtin-checks.c index c7f6078f903ba..8535ec915ac34 100644 --- a/clang/test/CodeGen/ubsan-builtin-checks.c +++ b/clang/test/CodeGen/ubsan-builtin-checks.c @@ -51,3 +51,20 @@ void check_clz(int n) { // CHECK: call void @__ubsan_handle_invalid_builtin __builtin_clzg((unsigned int)n); } + +// CHECK: define{{.*}} void @check_assume +void check_assume(int n) { + // CHECK: [[TOBOOL:%.*]] = icmp ne i32 [[N:%.*]], 0 + // CHECK-NEXT: br i1 [[TOBOOL]] + // + // Handler block: + // CHECK: call void @__ubsan_handle_invalid_builtin + // CHECK-NEXT: unreachable + // + // Continuation block: + // CHECK: call void @llvm.assume(i1 [[TOBOOL]]) + __builtin_assume(n); + + // CHECK: call void @__ubsan_handle_invalid_builtin + __attribute__((assume(n))); +} diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 9dbe8e6c0c174..a419cf0b2b555 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -633,9 +633,12 @@ static void handleInvalidBuiltin(InvalidBuiltinData *Data, ReportOptions Opts) { ScopedReport R(Opts, Loc, ET); - Diag(Loc, DL_Error, ET, - "passing zero to __builtin_%0(), which is not a valid argument") - << ((Data->Kind == BCK_CTZPassedZero) ? "ctz" : "clz"); + if (Data->Kind == BCK_AssumePassedFalse) + Diag(Loc, DL_Error, ET, "assumption is violated during execution"); + else + Diag(Loc, DL_Error, ET, + "passing zero to __builtin_%0(), which is not a valid argument") + << ((Data->Kind == BCK_CTZPassedZero) ? "ctz" : "clz"); } void __ubsan::__ubsan_handle_invalid_builtin(InvalidBuiltinData *Data) { diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h index bae661a56833d..4ffa1439a1323 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers.h @@ -159,6 +159,7 @@ RECOVERABLE(implicit_conversion, ImplicitConversionData *Data, ValueHandle Src, enum BuiltinCheckKind : unsigned char { BCK_CTZPassedZero, BCK_CLZPassedZero, + BCK_AssumePassedFalse, }; struct InvalidBuiltinData { diff --git a/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp b/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp index a635f7fcc686e..2702065bce067 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/builtins.cpp @@ -1,8 +1,8 @@ // REQUIRES: target={{x86_64.*}} // -// RUN: %clangxx -fsanitize=builtin -w %s -O3 -o %t +// RUN: %clangxx -fsanitize=builtin -fno-inline -w %s -O3 -o %t // RUN: %run %t 2>&1 | FileCheck %s --check-prefix=RECOVER -// RUN: %clangxx -fsanitize=builtin -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort +// RUN: %clangxx -fsanitize=builtin -fno-inline -fno-sanitize-recover=builtin -w %s -O3 -o %t.abort // RUN: not %run %t.abort 2>&1 | FileCheck %s --check-prefix=ABORT void check_ctz(int n) { @@ -28,8 +28,20 @@ void check_clz(int n) { __builtin_clzll(n); } +void check_assume(int n) { + // RECOVER: builtins.cpp:[[@LINE+1]]:20: runtime error: assumption is violated during execution + __builtin_assume(n); +} + +void check_assume_attr(int n) { + // RECOVER: builtins.cpp:[[@LINE+1]]:25: runtime error: assumption is violated during execution + __attribute__((assume(n))); +} + int main() { check_ctz(0); check_clz(0); + check_assume(0); + check_assume_attr(0); return 0; } From 4bd3a62cd600c607e7cb3d69dd75ac4069e3eef9 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 25 Sep 2024 08:00:32 +0200 Subject: [PATCH 097/212] [clang][bytecode] Fix diagnosing std::construct_at with wrong type (#109828) We can't make the assumption that types are always fine in std functions. --- clang/lib/AST/ByteCode/Descriptor.cpp | 11 ++++++++--- clang/lib/AST/ByteCode/Interp.cpp | 4 ---- clang/test/AST/ByteCode/placement-new.cpp | 12 +++++++++++- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 170203fe81877..05ece907af42f 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -389,12 +389,17 @@ Descriptor::Descriptor(const DeclTy &D) } QualType Descriptor::getType() const { - if (const auto *E = asExpr()) - return E->getType(); if (const auto *D = asValueDecl()) return D->getType(); - if (const auto *T = dyn_cast(asDecl())) + if (const auto *T = dyn_cast_if_present(asDecl())) return QualType(T->getTypeForDecl(), 0); + + // The Source sometimes has a different type than the once + // we really save. Try to consult the Record first. + if (isRecord()) + return QualType(ElemRecord->getDecl()->getTypeForDecl(), 0); + if (const auto *E = asExpr()) + return E->getType(); llvm_unreachable("Invalid descriptor type"); } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 8b578ccbeb679..b9c85626ffa99 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1296,10 +1296,6 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, const Expr *E, if (!InvalidNewDeleteExpr(S, OpPC, E)) return false; - // Assume proper types in std functions. - if (S.Current->isStdFunction()) - return true; - const auto *NewExpr = cast(E); QualType StorageType = Ptr.getType(); diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp index 7a562adae02a6..1ff6ff3ac1922 100644 --- a/clang/test/AST/ByteCode/placement-new.cpp +++ b/clang/test/AST/ByteCode/placement-new.cpp @@ -13,7 +13,8 @@ namespace std { }; template constexpr void construct_at(void *p, Args &&...args) { - new (p) T((Args&&)args...); // both-note {{in call to}} + new (p) T((Args&&)args...); // both-note {{in call to}} \ + // both-note {{placement new would change type of storage from 'int' to 'float'}} } } @@ -260,4 +261,13 @@ namespace ConstructAt { static_assert(ctorFail()); // both-error {{not an integral constant expression}} \ // both-note {{in call to 'ctorFail()'}} + + constexpr bool bad_construct_at_type() { + int a; + std::construct_at(&a, 1.0f); // both-note {{in call to}} + return true; + } + static_assert(bad_construct_at_type()); // both-error {{not an integral constant expression}} \ + // both-note {{in call}} + } From 416f101111e06339387ac456c288cb5e60f41bc1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 23:01:45 -0700 Subject: [PATCH 098/212] [clang] Use std::optional::value_or (NFC) (#109894) --- clang/include/clang/AST/PropertiesBase.td | 2 +- clang/include/clang/Basic/FileManager.h | 2 +- clang/lib/APINotes/APINotesYAMLCompiler.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/AST/PropertiesBase.td b/clang/include/clang/AST/PropertiesBase.td index 9b934b20cf255..3057669e3758b 100644 --- a/clang/include/clang/AST/PropertiesBase.td +++ b/clang/include/clang/AST/PropertiesBase.td @@ -39,7 +39,7 @@ class EnumPropertyType : PropertyType {} /// Supports optional values by using the null representation. class RefPropertyType : PropertyType { let PackOptional = - "value ? *value : nullptr"; + "value.value_or(nullptr)"; let UnpackOptional = "value ? std::optional<" # CXXName # ">(value) : std::nullopt"; } diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 527bbef24793e..74029a91d1a6d 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -293,7 +293,7 @@ class FileManager : public RefCountedBase { bool RequiresNullTerminator = true, std::optional MaybeLimit = std::nullopt) const { return getBufferForFileImpl(Filename, - /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), + /*FileSize=*/MaybeLimit.value_or(-1), isVolatile, RequiresNullTerminator); } diff --git a/clang/lib/APINotes/APINotesYAMLCompiler.cpp b/clang/lib/APINotes/APINotesYAMLCompiler.cpp index 16fd59244086f..f72a1d65b5456 100644 --- a/clang/lib/APINotes/APINotesYAMLCompiler.cpp +++ b/clang/lib/APINotes/APINotesYAMLCompiler.cpp @@ -757,8 +757,8 @@ class YAMLConverter { OutInfo.addTypeInfo(idx++, N); audited = Nullability.size() > 0 || ReturnNullability; if (audited) - OutInfo.addTypeInfo(0, ReturnNullability ? *ReturnNullability - : NullabilityKind::NonNull); + OutInfo.addTypeInfo(0, + ReturnNullability.value_or(NullabilityKind::NonNull)); if (!audited) return; OutInfo.NullabilityAudited = audited; From c2fd3b76f21632fa617a18c50dc9dff84b1a6538 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 25 Sep 2024 08:03:12 +0200 Subject: [PATCH 099/212] [ASan][test] XFAIL stack overflow tests on Linux/sparc64 (#109773) When enabling ASan SPARC testing as per PR #107405, 3 stack overflow tests `FAIL` on Linux/sparc64: ``` AddressSanitizer-sparc-linux :: TestCases/Linux/stack-overflow-recovery-mode.cpp AddressSanitizer-sparc-linux :: TestCases/Linux/stack-overflow-sigbus.cpp AddressSanitizer-sparc-linux :: TestCases/Posix/stack-overflow.cpp AddressSanitizer-sparc-linux-dynamic :: TestCases/Linux/stack-overflow-recovery-mode.cpp AddressSanitizer-sparc-linux-dynamic :: TestCases/Linux/stack-overflow-sigbus.cpp AddressSanitizer-sparc-linux-dynamic :: TestCases/Posix/stack-overflow.cpp ``` However, as detailed in Issue #109771, even a Linux equivalent of the Solaris/sparcv9 fix (PR #109101) doesn't improve the situation. Therefore this patch `XFAIL`s the tests until the root cause can be figured out. Tested on `sparc64-unknown-linux-gnu`, `sparcv9-sun-solaris2.11`, and `x86_64-pc-linux-gnu`. --- .../test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp | 3 +++ .../test/asan/TestCases/Linux/stack-overflow-sigbus.cpp | 3 +++ compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp index e99665953784a..461702a0ea7a9 100644 --- a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-recovery-mode.cpp @@ -3,6 +3,9 @@ // RUN: %clang_asan -O0 -fsanitize-recover=address %s -o %t // RUN: %env_asan_opts=halt_on_error=false not %run %t 2>&1 | FileCheck %s +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + #include #include #include diff --git a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp index 8c9599c9f6110..f6c95318238af 100644 --- a/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/stack-overflow-sigbus.cpp @@ -2,6 +2,9 @@ // RUN: %clangxx_asan -O0 %s -o %t && %env_asan_opts=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + #include #include #include diff --git a/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp b/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp index 06057250f8759..3d95a4ba273db 100644 --- a/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/stack-overflow.cpp @@ -16,6 +16,9 @@ // RUN: not %run %t 2>&1 | FileCheck %s // REQUIRES: stable-runtime +// Issue #109771 +// XFAIL: target={{sparc.*-.*-linux.*}} + // UNSUPPORTED: ios #include From 1cb12fa9edd744fe72897660541f7841401823c6 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 25 Sep 2024 07:04:09 +0100 Subject: [PATCH 100/212] [GlobalISel] Combine unmerge(unmerge()) if the result is legal. (#109606) This attempts to fold: ``` %1:_(<2 x s32>), %2:_(<2 x s32>) = G_UNMERGE_VALUES %0:_(<4 x s32>) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %1 ``` Into a single UNMERGE: ``` %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %0 ``` This transform already exists, this patch alters it to occur when the result UNMERGE is considered legal. It does not try to transform where the result would be extracting a subelement from a vector at the moment, as the code is not setup to handle it. ``` %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>) %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 ``` This helps us reduce the amount of legalization artefacts, especially from widened vectors padded with undef. --- .../GlobalISel/LegalizationArtifactCombiner.h | 4 + .../AArch64/GlobalISel/legalize-cmp.mir | 9 +- .../AArch64/GlobalISel/legalize-select.mir | 27 ++- llvm/test/CodeGen/AArch64/bswap.ll | 2 - .../AArch64/fixed-vector-deinterleave.ll | 5 - llvm/test/CodeGen/AArch64/fpext.ll | 35 ++-- llvm/test/CodeGen/AArch64/fptoi.ll | 132 +++++--------- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 47 +++-- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 47 +++-- llvm/test/CodeGen/AArch64/fptrunc.ll | 3 - llvm/test/CodeGen/AArch64/itofp.ll | 119 +++---------- llvm/test/CodeGen/AArch64/neon-perm.ll | 10 +- llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 59 +++--- llvm/test/CodeGen/AArch64/shift.ll | 33 +--- llvm/test/CodeGen/AArch64/shufflevector.ll | 73 +++++--- llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 59 +++--- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 61 ++++--- llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 57 +++--- .../artifact-combiner-unmerge-values.mir | 39 ++-- .../GlobalISel/legalize-store-global.mir | 168 +++++++++--------- .../AMDGPU/GlobalISel/legalize-trunc.mir | 12 +- 21 files changed, 439 insertions(+), 562 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index bc83f19dc581f..0dc827b9ad43e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -1090,6 +1090,10 @@ class LegalizationArtifactCombiner { LegalizeActionStep ActionStep = LI.getAction( {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); switch (ActionStep.Action) { + case LegalizeActions::Legal: + if (!OpTy.isVector() || !LI.isLegal({TargetOpcode::G_UNMERGE_VALUES, + {DestTy, SrcUnmergeSrcTy}})) + return false; case LegalizeActions::Lower: case LegalizeActions::Unsupported: break; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index 89750c90fc1cb..bd80a892e239e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -412,12 +412,11 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<4 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32) ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir index 92f8e524dbb31..52a28ad37e362 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -307,29 +307,24 @@ body: | ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s8>), [[UV9:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<4 x s16>), [[UV15:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s16>), [[UV17:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV14]], [[UV16]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<4 x s16>), [[UV11:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV8]], [[UV10]] ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV18]](s8), [[UV19]](s8), [[UV20]](s8), [[UV21]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>) - ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(<4 x s16>), [[UV23:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV22]] + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV12]] ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC10]], [[XOR]] ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]] ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR5]] + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR3]] ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %w0:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll index b14f1a43b7dcf..74e4a167ae14c 100644 --- a/llvm/test/CodeGen/AArch64/bswap.ll +++ b/llvm/test/CodeGen/AArch64/bswap.ll @@ -179,8 +179,6 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){ ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: rev16 v0.8b, v0.8b -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll index 5bd680ed48938..bbfec8c7c3361 100644 --- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll @@ -18,11 +18,6 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h -; CHECK-GI-NEXT: mov h0, v2.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v2.h[1], v0.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-GI-NEXT: fmov d0, d2 ; CHECK-GI-NEXT: ret %retval = call {<2 x half>, <2 x half>} @llvm.vector.deinterleave2.v4f16(<4 x half> %vec) diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll index d942839c577d2..df90f9d5f0910 100644 --- a/llvm/test/CodeGen/AArch64/fpext.ll +++ b/llvm/test/CodeGen/AArch64/fpext.ll @@ -376,15 +376,15 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) { ; CHECK-GI-LABEL: fpext_v4f16_v4f64: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov h3, v0.h[3] ; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: fcvt d2, h2 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: fcvt d3, h3 -; CHECK-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-NEXT: fcvt d4, h1 +; CHECK-GI-NEXT: fcvt d1, h2 +; CHECK-GI-NEXT: fcvt d2, h3 +; CHECK-GI-NEXT: mov v0.d[1], v4.d[0] +; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] ; CHECK-GI-NEXT: ret entry: %c = fpext <4 x half> %a to <4 x double> @@ -392,20 +392,11 @@ entry: } define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) { -; CHECK-SD-LABEL: fpext_v2f16_v2f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fpext_v2f16_v2f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fpext_v2f16_v2f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %c = fpext <2 x half> %a to <2 x float> ret <2 x float> %c diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index f72a49f6ab7c8..c436c410a4e39 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -3961,9 +3961,6 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2d, v0.2d @@ -4008,9 +4005,6 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtl v0.2d, v0.2s ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2d, v0.2d @@ -4207,17 +4201,17 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) { ; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptosi <4 x half> %a to <4 x i64> @@ -4273,17 +4267,17 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) { ; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptoui <4 x half> %a to <4 x i64> @@ -4369,29 +4363,29 @@ define <8 x i64> @fptos_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptos_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptosi <8 x half> %a to <8 x i64> @@ -4477,29 +4471,29 @@ define <8 x i64> @fptou_v8f16_v8i64(<8 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptou_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret entry: %c = fptoui <8 x half> %a to <8 x i64> @@ -5708,9 +5702,6 @@ define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fptos_v2f16_v2i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -5729,9 +5720,6 @@ define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fptou_v2f16_v2i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -5984,21 +5972,13 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6017,21 +5997,13 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6460,21 +6432,13 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i8: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i8: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret @@ -6493,21 +6457,13 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i8: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NOFP16-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i8: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index ed7814938da25..2d568e858c36b 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -1270,9 +1270,6 @@ define <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) { ; ; CHECK-GI-LABEL: test_signed_v2f16_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzs v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -3301,17 +3298,17 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) { ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -4167,29 +4164,29 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; ; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzs v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzs v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 40a865338cd85..f63fba9dab6c6 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -1098,9 +1098,6 @@ define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v2f16_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h ; CHECK-GI-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-GI-NEXT: ret @@ -2711,17 +2708,17 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) { ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64: ; CHECK-GI-FP16: // %bb.0: ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 +; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d ; CHECK-GI-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -3433,29 +3430,29 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; ; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-GI-FP16: // %bb.0: -; CHECK-GI-FP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-FP16-NEXT: mov s2, v0.s[2] -; CHECK-GI-FP16-NEXT: mov s3, v0.s[3] -; CHECK-GI-FP16-NEXT: mov h4, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov h7, v0.h[7] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 -; CHECK-GI-FP16-NEXT: mov h5, v1.h[1] -; CHECK-GI-FP16-NEXT: mov h6, v2.h[1] -; CHECK-GI-FP16-NEXT: mov h7, v3.h[1] -; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 ; CHECK-GI-FP16-NEXT: fcvt d2, h2 ; CHECK-GI-FP16-NEXT: fcvt d3, h3 +; CHECK-GI-FP16-NEXT: fcvt d4, h4 ; CHECK-GI-FP16-NEXT: fcvt d5, h5 ; CHECK-GI-FP16-NEXT: fcvt d6, h6 ; CHECK-GI-FP16-NEXT: fcvt d7, h7 -; CHECK-GI-FP16-NEXT: mov v0.d[1], v4.d[0] -; CHECK-GI-FP16-NEXT: mov v1.d[1], v5.d[0] -; CHECK-GI-FP16-NEXT: mov v2.d[1], v6.d[0] -; CHECK-GI-FP16-NEXT: mov v3.d[1], v7.d[0] +; CHECK-GI-FP16-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-FP16-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-FP16-NEXT: mov v4.d[1], v5.d[0] +; CHECK-GI-FP16-NEXT: mov v6.d[1], v7.d[0] ; CHECK-GI-FP16-NEXT: fcvtzu v0.2d, v0.2d -; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v1.2d -; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v2.2d -; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v3.2d +; CHECK-GI-FP16-NEXT: fcvtzu v1.2d, v2.2d +; CHECK-GI-FP16-NEXT: fcvtzu v2.2d, v4.2d +; CHECK-GI-FP16-NEXT: fcvtzu v3.2d, v6.2d ; CHECK-GI-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index c0d4ddef23132..2187717c4148a 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -366,9 +366,6 @@ define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = fptrunc <2 x float> %a to <2 x half> diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index f70ec0f35cb58..c5bde81ba4a5e 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -3313,24 +3313,17 @@ define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov h2, v1.h[1] -; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: smov x8, v0.s[0] -; CHECK-GI-NEXT: smov x9, v0.s[1] -; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: smov x8, v1.s[0] -; CHECK-GI-NEXT: mov v0.d[1], x9 -; CHECK-GI-NEXT: smov x9, v1.s[1] +; CHECK-GI-NEXT: smov x8, v0.h[0] +; CHECK-GI-NEXT: smov x9, v0.h[1] ; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: smov x8, v0.h[2] ; CHECK-GI-NEXT: mov v1.d[1], x9 -; CHECK-GI-NEXT: scvtf v0.2d, v0.2d -; CHECK-GI-NEXT: scvtf v2.2d, v1.2d +; CHECK-GI-NEXT: smov x9, v0.h[3] +; CHECK-GI-NEXT: mov v2.d[0], x8 +; CHECK-GI-NEXT: scvtf v0.2d, v1.2d +; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: scvtf v2.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret @@ -3365,24 +3358,17 @@ define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[1], w1 ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov h2, v1.h[1] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] -; CHECK-GI-NEXT: mov w8, v0.s[0] -; CHECK-GI-NEXT: mov w9, v0.s[1] -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: mov w8, v1.s[0] -; CHECK-GI-NEXT: mov v0.d[1], x9 -; CHECK-GI-NEXT: mov w9, v1.s[1] +; CHECK-GI-NEXT: umov w8, v0.h[0] +; CHECK-GI-NEXT: umov w9, v0.h[1] ; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: umov w8, v0.h[2] ; CHECK-GI-NEXT: mov v1.d[1], x9 -; CHECK-GI-NEXT: ucvtf v0.2d, v0.2d -; CHECK-GI-NEXT: ucvtf v2.2d, v1.2d +; CHECK-GI-NEXT: umov w9, v0.h[3] +; CHECK-GI-NEXT: mov v2.d[0], x8 +; CHECK-GI-NEXT: ucvtf v0.2d, v1.2d +; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ucvtf v2.2d, v2.2d ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-GI-NEXT: ret @@ -5267,11 +5253,8 @@ define <3 x float> @stofp_v3i8_v3f32(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] @@ -5300,11 +5283,8 @@ define <3 x float> @utofp_v3i8_v3f32(<3 x i8> %a) { ; CHECK-GI-NEXT: mov v0.h[1], w1 ; CHECK-GI-NEXT: mov v0.h[2], w2 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] @@ -6222,9 +6202,6 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i64_v2f16: @@ -6271,9 +6248,6 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i64_v2f16: @@ -7210,9 +7184,6 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = sitofp <2 x i32> %a to <2 x half> @@ -7233,9 +7204,6 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %c = uitofp <2 x i32> %a to <2 x half> @@ -7443,18 +7411,12 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i16_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <2 x i16> %a to <2 x half> @@ -7484,18 +7446,12 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i16_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <2 x i16> %a to <2 x half> @@ -7977,9 +7933,6 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: stofp_v2i8_v2f16: @@ -7990,12 +7943,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-FP16-NEXT: xtn v0.4h, v1.4s ; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = sitofp <2 x i8> %a to <2 x half> @@ -8039,27 +7987,14 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v1.s[0], v0.s[0] ; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v0.s[1] ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-FP16-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-FP16-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-FP16-NEXT: xtn v0.4h, v1.4s -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff -; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h -; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-FP16-NEXT: ret entry: %c = uitofp <2 x i8> %a to <2 x half> @@ -8096,11 +8031,8 @@ define <3 x half> @stofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2 ; CHECK-GI-NOFP16-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NOFP16-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NOFP16-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-GI-NOFP16-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0] @@ -8149,11 +8081,8 @@ define <3 x half> @utofp_v3i8_v3f16(<3 x i8> %a) { ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w1 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2 ; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[0], v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v0.h[3] ; CHECK-GI-NOFP16-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NOFP16-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0] diff --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll index 2897741780f60..7b85924ce1e32 100644 --- a/llvm/test/CodeGen/AArch64/neon-perm.ll +++ b/llvm/test/CodeGen/AArch64/neon-perm.ll @@ -1739,15 +1739,7 @@ define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) { ; ; CHECK-GI-LABEL: test_vzip1_v4i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index adac75758220e..6d331d9413f91 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -190,23 +185,39 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-NEXT: add x8, x0, #1 -; CHECK-NEXT: add x9, x1, #1 -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #1 +; CHECK-SD-NEXT: add x9, x1, #1 +; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -256,10 +267,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: sqadd v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll index 54f7887aee8d3..066928687cc02 100644 --- a/llvm/test/CodeGen/AArch64/shift.ll +++ b/llvm/test/CodeGen/AArch64/shift.ll @@ -534,14 +534,7 @@ define <4 x i8> @shl_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = shl <4 x i8> %0, %1 @@ -577,8 +570,6 @@ define <2 x i16> @shl_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -723,14 +714,7 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: neg v1.8b, v1.8b ; CHECK-GI-NEXT: sshl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = ashr <4 x i8> %0, %1 @@ -766,8 +750,6 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: neg v1.4h, v1.4h ; CHECK-GI-NEXT: sshl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -906,14 +888,7 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: neg v1.8b, v1.8b ; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %3 = lshr <4 x i8> %0, %1 @@ -948,8 +923,6 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %0, <2 x i16> %1){ ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: neg v1.4h, v1.4h ; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 5f4ff1e64673b..6b5951551c3a5 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -2,9 +2,6 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes - ; ===== Legal Vector Types ===== define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { @@ -183,13 +180,30 @@ define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) { ; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ -; CHECK-LABEL: shufflevector_v2i1: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.s[1], v1.s[1] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v2i1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov v0.s[1], v1.s[1] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v2i1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov w8, v1.s[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov w9, v0.s[1] +; CHECK-GI-NEXT: mov v1.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[1], w9 +; CHECK-GI-NEXT: mov b1, v1.b[1] +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: umov w8, v0.b[0] +; CHECK-GI-NEXT: umov w9, v0.b[1] +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: mov v0.s[1], w9 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> ret <2 x i1> %c } @@ -358,11 +372,24 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ===== Smaller/Larger Width Vectors with Zero Masks ===== define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){ -; CHECK-LABEL: shufflevector_v2i1_zeroes: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: dup v0.2s, v0.s[0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v2i1_zeroes: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: dup v0.2s, v0.s[0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v2i1_zeroes: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] +; CHECK-GI-NEXT: umov w8, v0.b[0] +; CHECK-GI-NEXT: umov w9, v0.b[1] +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: mov v0.s[1], w9 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> ret <2 x i1> %c } @@ -486,11 +513,9 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI30_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: fmov w1, s1 -; CHECK-GI-NEXT: fmov w2, s2 +; CHECK-GI-NEXT: umov w0, v0.b[0] +; CHECK-GI-NEXT: umov w1, v0.b[1] +; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> ret <3 x i8> %c @@ -598,11 +623,9 @@ define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) { ; CHECK-GI-NEXT: mov v0.b[1], w1 ; CHECK-GI-NEXT: mov v0.b[2], w2 ; CHECK-GI-NEXT: dup v0.8b, v0.b[0] -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: fmov w1, s1 -; CHECK-GI-NEXT: fmov w2, s2 +; CHECK-GI-NEXT: umov w0, v0.b[0] +; CHECK-GI-NEXT: umov w1, v0.b[1] +; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> ret <3 x i8> %c diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index 12371ef2c0021..dddda7e9ba64c 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -191,23 +186,39 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-NEXT: add x8, x0, #1 -; CHECK-NEXT: add x9, x1, #1 -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #1 +; CHECK-SD-NEXT: add x9, x1, #1 +; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -257,10 +268,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: sqsub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index e99935e8677fc..badd31c1c561c 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) @@ -187,24 +182,40 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: movi d2, #0x0000ff000000ff -; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrb w8, [x0] +; CHECK-SD-NEXT: ldrb w9, [x1] +; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff +; CHECK-SD-NEXT: ldrb w10, [x0, #1] +; CHECK-SD-NEXT: ldrb w11, [x1, #1] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -255,10 +266,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: uqadd v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index cdba9625431a5..45418b5c648fa 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -2,11 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 - declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) @@ -188,22 +183,38 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x1] -; CHECK-NEXT: ldrb w10, [x0, #1] -; CHECK-NEXT: ldrb w11, [x1, #1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrb w8, [x0] +; CHECK-SD-NEXT: ldrb w9, [x1] +; CHECK-SD-NEXT: ldrb w10, [x0, #1] +; CHECK-SD-NEXT: ldrb w11, [x1, #1] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strb w9, [x2] +; CHECK-SD-NEXT: strb w8, [x2, #1] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x2, #1 +; CHECK-GI-NEXT: ldr b2, [x0, #1] +; CHECK-GI-NEXT: ldr b3, [x1, #1] +; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ret %x = load <2 x i8>, ptr %px %y = load <2 x i8>, ptr %py %z = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x, <2 x i8> %y) @@ -252,10 +263,10 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI-NEXT: add x9, x1, #2 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add x8, x2, #2 ; CHECK-GI-NEXT: uqsub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: str h0, [x2] -; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir index 3b456ed248b3a..8300b2bc05e96 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -1151,14 +1151,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV2]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>) ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>) ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>), implicit [[CONCAT_VECTORS1]](<4 x s16>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -1250,14 +1249,13 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV6]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV7]](s64) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC2]](s32), [[TRUNC3]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>), implicit [[BUILD_VECTOR1]](<2 x s32>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 @@ -1300,21 +1298,20 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV6]](s64) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV7]](s64) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 31f28b50462b7..f2a88a21a286e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -7418,9 +7418,8 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7440,7 +7439,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7456,7 +7455,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7472,7 +7471,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7488,8 +7487,8 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7504,7 +7503,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7519,7 +7518,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7534,7 +7533,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7565,9 +7564,8 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7588,7 +7586,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) @@ -7605,7 +7603,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) @@ -7622,7 +7620,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) @@ -7639,8 +7637,8 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -7656,7 +7654,7 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) @@ -7672,7 +7670,7 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) @@ -7688,7 +7686,7 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) @@ -7730,9 +7728,8 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7741,47 +7738,47 @@ body: | ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -7803,9 +7800,8 @@ body: | ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -7814,47 +7810,47 @@ body: | ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -8107,9 +8103,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8129,7 +8124,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8145,7 +8140,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8161,7 +8156,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR8]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; SI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY10]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8177,8 +8172,8 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8193,7 +8188,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR12]](s32), [[PTR_ADD16]](p1) :: (store (s8) into unknown-address + 18, addrspace 1) ; SI-NEXT: G_STORE [[LSHR14]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; SI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY16]], [[C]](s32) ; SI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8208,7 +8203,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR15]](s32), [[PTR_ADD20]](p1) :: (store (s8) into unknown-address + 22, addrspace 1) ; SI-NEXT: G_STORE [[LSHR17]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; SI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[C]](s32) ; SI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8223,7 +8218,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR18]](s32), [[PTR_ADD24]](p1) :: (store (s8) into unknown-address + 26, addrspace 1) ; SI-NEXT: G_STORE [[LSHR20]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; SI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C7]](s64) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY22]], [[C]](s32) ; SI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; SI-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -8256,9 +8251,8 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8279,7 +8273,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 3, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) @@ -8296,7 +8290,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT3]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) @@ -8313,7 +8307,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT5]](s32), [[PTR_ADD10]](p1) :: (store (s8) into unknown-address + 11, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) @@ -8330,8 +8324,8 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -8347,7 +8341,7 @@ body: | ; VI-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) ; VI-NEXT: G_STORE [[ANYEXT9]](s32), [[PTR_ADD18]](p1) :: (store (s8) into unknown-address + 19, addrspace 1) ; VI-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD19]], [[C1]](s64) ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) @@ -8363,7 +8357,7 @@ body: | ; VI-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) ; VI-NEXT: G_STORE [[ANYEXT11]](s32), [[PTR_ADD22]](p1) :: (store (s8) into unknown-address + 23, addrspace 1) ; VI-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C5]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD23]], [[C1]](s64) ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) @@ -8379,7 +8373,7 @@ body: | ; VI-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR20]](s16) ; VI-NEXT: G_STORE [[ANYEXT13]](s32), [[PTR_ADD26]](p1) :: (store (s8) into unknown-address + 27, addrspace 1) ; VI-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD27]], [[C1]](s64) ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) @@ -8423,9 +8417,8 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8434,47 +8427,47 @@ body: | ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; SI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; SI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; SI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; SI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; SI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; SI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) @@ -8498,9 +8491,8 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s256) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](s256) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) - ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<4 x s32>) - ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 @@ -8509,47 +8501,47 @@ body: | ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 2, addrspace 1) ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 4, addrspace 1) ; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) ; VI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD4]](p1) :: (store (s16) into unknown-address + 10, addrspace 1) ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[C]](s32) ; VI-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD5]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY5]](s32), [[PTR_ADD5]](p1) :: (store (s16) into unknown-address + 12, addrspace 1) ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](<4 x s32>) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV12]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD8]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; VI-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV13]](s32) ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; VI-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD9]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY7]](s32), [[PTR_ADD9]](p1) :: (store (s16) into unknown-address + 20, addrspace 1) ; VI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD10]](p1) :: (store (s16) into unknown-address + 22, addrspace 1) ; VI-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s64) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV14]](s32) ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY8]](s32), [[PTR_ADD11]](p1) :: (store (s16) into unknown-address + 24, addrspace 1) ; VI-NEXT: G_STORE [[LSHR6]](s32), [[PTR_ADD12]](p1) :: (store (s16) into unknown-address + 26, addrspace 1) ; VI-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) - ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV9]](s32) + ; VI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV15]](s32) ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[C]](s32) ; VI-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD13]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY9]](s32), [[PTR_ADD13]](p1) :: (store (s16) into unknown-address + 28, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir index 5205386c8ea71..282550830442c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -151,21 +151,19 @@ body: | ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s64>), [[UV1:%[0-9]+]]:_(<2 x s64>) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV]](s64) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV4]](s64) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV5]](s64) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] From 1cdcec5884fb99d921800197e0863329de6b10ee Mon Sep 17 00:00:00 2001 From: yronglin Date: Wed, 25 Sep 2024 14:09:06 +0800 Subject: [PATCH 101/212] [clang][bytecode] Handle vector comma op (#109827) Currently we were not handle comma op for vector operands, after this patch, we handle the comma op for vector operands with common way. Signed-off-by: yronglin --- clang/lib/AST/ByteCode/Compiler.cpp | 8 +++++--- clang/test/Sema/fp16vec-sema.c | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 785918846976d..e54b6568d7060 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -725,9 +725,7 @@ bool Compiler::VisitParenExpr(const ParenExpr *E) { template bool Compiler::VisitBinaryOperator(const BinaryOperator *BO) { // Need short-circuiting for these. - if (BO->getType()->isVectorType()) - return this->VisitVectorBinOp(BO); - if (BO->isLogicalOp()) + if (BO->isLogicalOp() && !BO->getType()->isVectorType()) return this->VisitLogicalBinOp(BO); const Expr *LHS = BO->getLHS(); @@ -746,6 +744,8 @@ bool Compiler::VisitBinaryOperator(const BinaryOperator *BO) { if (BO->getType()->isAnyComplexType()) return this->VisitComplexBinOp(BO); + if (BO->getType()->isVectorType()) + return this->VisitVectorBinOp(BO); if ((LHS->getType()->isAnyComplexType() || RHS->getType()->isAnyComplexType()) && BO->isComparisonOp()) @@ -1264,6 +1264,8 @@ bool Compiler::VisitComplexBinOp(const BinaryOperator *E) { template bool Compiler::VisitVectorBinOp(const BinaryOperator *E) { + assert(!E->isCommaOp() && + "Comma op should be handled in VisitBinaryOperator"); assert(E->getType()->isVectorType()); assert(E->getLHS()->getType()->isVectorType()); assert(E->getRHS()->getType()->isVectorType()); diff --git a/clang/test/Sema/fp16vec-sema.c b/clang/test/Sema/fp16vec-sema.c index 80936cd622f7c..89f01c6dcf47b 100644 --- a/clang/test/Sema/fp16vec-sema.c +++ b/clang/test/Sema/fp16vec-sema.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -Wno-unused-value -verify %s +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -fsyntax-only -Wno-unused-value -verify %s typedef __fp16 half4 __attribute__ ((vector_size (8))); typedef float float4 __attribute__ ((vector_size (16))); From 832297ca32f4fba75fea68efc93a3e6c222c89ff Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 24 Sep 2024 23:22:20 -0700 Subject: [PATCH 102/212] Fix compatibility version in test (#97128) The mangling compatibility being tested here changed between Clang 17 and 18, not between 16 and 17, so change the "old" version to 17. As requested by @ahatanak in [post-commit review](https://github.com/llvm/llvm-project/commit/7421dd55a16f18919a568499e4c0888ed3a5e8b5#commitcomment-128651446). --- clang/test/CodeGenCXX/mangle-concept.cpp | 52 ++++++++++++------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/clang/test/CodeGenCXX/mangle-concept.cpp b/clang/test/CodeGenCXX/mangle-concept.cpp index e9c46d87635ab..91dc1b0e688e0 100644 --- a/clang/test/CodeGenCXX/mangle-concept.cpp +++ b/clang/test/CodeGenCXX/mangle-concept.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=latest | FileCheck %s -// RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=16 | FileCheck %s --check-prefix=CLANG16 +// RUN: %clang_cc1 -verify -std=c++20 -emit-llvm -triple %itanium_abi_triple -o - %s -fclang-abi-compat=17 | FileCheck %s --check-prefix=CLANG17 // expected-no-diagnostics namespace test1 { @@ -8,7 +8,7 @@ template concept C = true; template S> f0() { return S>{}; } template S> f0<>(); // CHECK: @_ZN5test12f0IiEENS_1SIX1CIT_EEEEv( -// CLANG16: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv( +// CLANG17: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv( } template struct S {}; @@ -18,12 +18,12 @@ template concept D = true; template S> f0a() { return S>{}; } template S> f0a<>(); // CHECK: @_Z3f0aIiE1SIXsr5test1E1CIT_EEEv( -// CLANG16: @_Z3f0aIiE1SIL_ZN5test11CIT_EEEEv( +// CLANG17: @_Z3f0aIiE1SIL_ZN5test11CIT_EEEEv( template S> f0() { return S>{}; } template S> f0<>(); // CHECK: @_Z2f0IiE1SIX1CIT_EEEv( -// CLANG16: @_Z2f0IiE1SIL_Z1CIT_EEEv( +// CLANG17: @_Z2f0IiE1SIL_Z1CIT_EEEv( template concept True = true; @@ -56,25 +56,25 @@ namespace test2 { // CHECK-LABEL: define {{.*}}@{{.*}}test2{{.*}}use void use() { // CHECK: call {{.*}}@_ZN5test21AIiEF1fEzQ4TrueIT_E( - // CLANG16: call {{.*}}@_ZN5test21fEz( + // CLANG17: call {{.*}}@_ZN5test21fEz( f(ai); // CHECK: call {{.*}}@_ZN5test2F1gIvEEvzQaa4TrueIT_E4TrueITL0__E( - // CLANG16: call {{.*}}@_ZN5test21gIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21gIvEEvz( g(ai); // CHECK: call {{.*}}@_ZN5test21hIvEEvzQ4TrueITL0__E( - // CLANG16: call {{.*}}@_ZN5test21hIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21hIvEEvz( h(ai); // CHECK: call {{.*}}@_ZN5test2F1iIvQaa4TrueIT_E4TrueITL0__EEEvz( - // CLANG16: call {{.*}}@_ZN5test21iIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21iIvEEvz( i(ai); // CHECK: call {{.*}}@_ZN5test21jIvQ4TrueITL0__EEEvz( - // CLANG16: call {{.*}}@_ZN5test21jIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21jIvEEvz( j(ai); // CHECK: call {{.*}}@_ZN5test2F1kITk4TruevQ4TrueIT_EEEvz( - // CLANG16: call {{.*}}@_ZN5test21kIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21kIvEEvz( k(ai); // CHECK: call {{.*}}@_ZN5test21lITk4TruevEEvz( - // CLANG16: call {{.*}}@_ZN5test21lIvEEvz( + // CLANG17: call {{.*}}@_ZN5test21lIvEEvz( l(ai); } } @@ -84,38 +84,38 @@ namespace test3 { template void d() {} template void d<0>(); // CHECK: define {{.*}}@_ZN5test31dITnDaLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31dILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31dILi0EEEvv( template void e() {} template void e<0>(); // CHECK: define {{.*}}@_ZN5test31eITnDcLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31eILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31eILi0EEEvv( // Constrained auto. template void f() {} template void f<0>(); // CHECK: define {{.*}}@_ZN5test31fITnDk1CLi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31fILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31fILi0EEEvv( template auto> void g() {} template void g<0>(); // CHECK: define {{.*}}@_ZN5test31gITnDk1DIiELi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31gILi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31gILi0EEEvv( template auto> void h() {} template void h(); // CHECK: define {{.*}}@_ZN5test31hIiTnDk1DIT_ELi0EEEvv( - // CLANG16: define {{.*}}@_ZN5test31hIiLi0EEEvv( + // CLANG17: define {{.*}}@_ZN5test31hIiLi0EEEvv( template void i(decltype(new C auto(T()))) {} template void i(int*); // CHECK: define {{.*}}@_ZN5test31iIiEEvDTnw_Dk1CpicvT__EEE( - // CLANG16: define {{.*}}@_ZN5test31iIiEEvDTnw_DapicvT__EEE( + // CLANG17: define {{.*}}@_ZN5test31iIiEEvDTnw_DapicvT__EEE( template void j(decltype(new C decltype(auto)(T()))) {} template void j(int*); // CHECK: define {{.*}}@_ZN5test31jIiEEvDTnw_DK1CpicvT__EEE( - // CLANG16: define {{.*}}@_ZN5test31jIiEEvDTnw_DcpicvT__EEE( + // CLANG17: define {{.*}}@_ZN5test31jIiEEvDTnw_DcpicvT__EEE( } namespace test4 { @@ -123,12 +123,12 @@ namespace test4 { template void f() {} template void f(); // CHECK: define {{.*}}@_ZN5test41fITk1CiEEvv( - // CLANG16: define {{.*}}@_ZN5test41fIiEEvv( + // CLANG17: define {{.*}}@_ZN5test41fIiEEvv( template> void g() {} template void g(); // CHECK: define {{.*}}@_ZN5test41gITk1DIiEiEEvv( - // CLANG16: define {{.*}}@_ZN5test41gIiEEvv( + // CLANG17: define {{.*}}@_ZN5test41gIiEEvv( } namespace test5 { @@ -175,18 +175,18 @@ namespace test5 { template typename> void p() {} // CHECK: define {{.*}}@_ZN5test51pINS_1AEEEvv( - // CLANG16: define {{.*}}@_ZN5test51pINS_1AEEEvv( + // CLANG17: define {{.*}}@_ZN5test51pINS_1AEEEvv( template void p(); // CHECK: define {{.*}}@_ZN5test51pITtTpTyENS_1BEEEvv( - // CLANG16: define {{.*}}@_ZN5test51pINS_1BEEEvv( + // CLANG17: define {{.*}}@_ZN5test51pINS_1BEEEvv( template void p(); template typename> void q() {} // CHECK: define {{.*}}@_ZN5test51qITtTyTyENS_1AEEEvv( - // CLANG16: define {{.*}}@_ZN5test51qINS_1AEEEvv( + // CLANG17: define {{.*}}@_ZN5test51qINS_1AEEEvv( template void q(); // CHECK: define {{.*}}@_ZN5test51qINS_1BEEEvv( - // CLANG16: define {{.*}}@_ZN5test51qINS_1BEEEvv( + // CLANG17: define {{.*}}@_ZN5test51qINS_1BEEEvv( template void q(); } @@ -194,13 +194,13 @@ namespace test6 { // Abbreviated function templates. void f(C auto) {} // CHECK: define {{.*}}@_ZN5test61fITk1CiEEvT_( - // CLANG16: define {{.*}}@_ZN5test61fIiEEvT_( + // CLANG17: define {{.*}}@_ZN5test61fIiEEvT_( template void f(int); template void g(D auto) {} // CHECK: define {{.*}}@_ZN5test61gIiTk1DIT_EiEEvT0_( - // CLANG16: define {{.*}}@_ZN5test61gIiiEEvT0_( + // CLANG17: define {{.*}}@_ZN5test61gIiiEEvT0_( template void g(int); } From c08a4376442aad58a1c58d3ff95fa74417d24353 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 24 Sep 2024 23:24:31 -0700 Subject: [PATCH 103/212] [GlobalISel] Fix a warning This patch fixes: llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h:1097:7: error: unannotated fall-through between switch labels [-Werror,-Wimplicit-fallthrough] --- .../llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 0dc827b9ad43e..471a7f70dd546 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -1094,6 +1094,7 @@ class LegalizationArtifactCombiner { if (!OpTy.isVector() || !LI.isLegal({TargetOpcode::G_UNMERGE_VALUES, {DestTy, SrcUnmergeSrcTy}})) return false; + break; case LegalizeActions::Lower: case LegalizeActions::Unsupported: break; From a6ea0b0d2973d3d94f9ed7aac6db9ca722664774 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Wed, 25 Sep 2024 07:28:07 +0100 Subject: [PATCH 104/212] [compiler-rt] intercept macOs's freadlink call. (#83679) available since macOs Ventura. --- .../sanitizer_common_interceptors.inc | 19 ++++++++++++ .../sanitizer_platform_interceptors.h | 1 + .../TestCases/Darwin/freadlink.c | 29 +++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index b382e7a61950c..7898af4a335e3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -10342,6 +10342,24 @@ INTERCEPTOR(SSIZE_T, pwritev2, int fd, __sanitizer_iovec *iov, int iovcnt, #define INIT_PWRITEV2 #endif +#if SANITIZER_INTERCEPT_FREADLINK +INTERCEPTOR(SSIZE_T, freadlink, int fd, char *buf, SIZE_T bufsiz) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, freadlink, fd, buf, bufsiz); + COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd); + SSIZE_T res = REAL(freadlink)(fd, buf, bufsiz); + if (res > 0) + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res); + if (res >= 0 && fd > 0) + COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd); + return res; +} + +# define INIT_FREADLINK COMMON_INTERCEPT_FUNCTION(freadlink) +#else +# define INIT_FREADLINK +#endif + #include "sanitizer_common_interceptors_netbsd_compat.inc" namespace __sanitizer { @@ -10663,6 +10681,7 @@ static void InitializeCommonInterceptors() { INIT_CPUSET_GETAFFINITY; INIT_PREADV2; INIT_PWRITEV2; + INIT_FREADLINK; INIT___PRINTF_CHK; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index e71a6bcd6a837..05fa7e63268f2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -606,6 +606,7 @@ // FIXME: also available from musl 1.2.5 #define SANITIZER_INTERCEPT_PREADV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) #define SANITIZER_INTERCEPT_PWRITEV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) +#define SANITIZER_INTERCEPT_FREADLINK SI_MAC // This macro gives a way for downstream users to override the above // interceptor macros irrespective of the platform they are on. They have diff --git a/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c b/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c new file mode 100644 index 0000000000000..53658cdb66aa3 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Darwin/freadlink.c @@ -0,0 +1,29 @@ +// RUN: %clang -O0 %s -o %t && %run %t + +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + char symlink_path[PATH_MAX]; + snprintf(symlink_path, sizeof(symlink_path), "%s_%d.symlink", argv[0], + getpid()); + remove(symlink_path); + int res = symlink(argv[0], symlink_path); + assert(!res); + + int fd; + char readlink_path[PATH_MAX]; + fd = open(symlink_path, O_RDONLY); + ssize_t res2 = freadlink(fd, readlink_path, sizeof(readlink_path)); + assert(res2 >= 0); + readlink_path[res2] = '\0'; + assert(!strcmp(readlink_path, argv[0])); + close(fd); + + return 0; +} From df6822f4eb81638e724f09b948b83fe678412d25 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Wed, 25 Sep 2024 08:31:42 +0200 Subject: [PATCH 105/212] [lldb] Fix error reporting in SBTarget::ReadMemory (#109764) The function should use the by-ref SBError argument instead of creating a new one. This code has been here since ~forever, and was probably copied from methods which return an SBError result (where one needs to create a local variable). --- lldb/source/API/SBTarget.cpp | 5 ++--- lldb/test/API/python_api/target/TestTargetAPI.py | 5 +++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 1c1f7e2a03def..d5017ad6bff16 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -662,15 +662,14 @@ size_t SBTarget::ReadMemory(const SBAddress addr, void *buf, size_t size, lldb::SBError &error) { LLDB_INSTRUMENT_VA(this, addr, buf, size, error); - SBError sb_error; size_t bytes_read = 0; TargetSP target_sp(GetSP()); if (target_sp) { std::lock_guard guard(target_sp->GetAPIMutex()); bytes_read = - target_sp->ReadMemory(addr.ref(), buf, size, sb_error.ref(), true); + target_sp->ReadMemory(addr.ref(), buf, size, error.ref(), true); } else { - sb_error.SetErrorString("invalid target"); + error.SetErrorString("invalid target"); } return bytes_read; diff --git a/lldb/test/API/python_api/target/TestTargetAPI.py b/lldb/test/API/python_api/target/TestTargetAPI.py index 2e8d6a5b1e53f..155a25b576b03 100644 --- a/lldb/test/API/python_api/target/TestTargetAPI.py +++ b/lldb/test/API/python_api/target/TestTargetAPI.py @@ -153,6 +153,11 @@ def test_read_memory(self): self.assertSuccess(error, "Make sure memory read succeeded") self.assertEqual(len(content), 1) + # Make sure reading from 0x0 fails + sb_addr = lldb.SBAddress(0, target) + self.assertIsNone(target.ReadMemory(sb_addr, 1, error)) + self.assertTrue(error.Fail()) + @skipIfWindows # stdio manipulation unsupported on Windows @skipIfRemote # stdio manipulation unsupported on remote iOS devices @skipIf(oslist=["linux"], archs=["arm", "aarch64"]) From 7c825f0d6a6db097e364bd747f8421518d34093a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 25 Sep 2024 06:37:47 +0000 Subject: [PATCH 106/212] [gn build] Port fa824dc0dd96 --- llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn index 0f6e345b9d175..3fecf9477ee76 100644 --- a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn @@ -21,6 +21,7 @@ static_library("IR") { "BasicBlock.cpp", "BuiltinGCs.cpp", "Comdat.cpp", + "ConstantFPRange.cpp", "ConstantFold.cpp", "ConstantRange.cpp", "ConstantRangeList.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index ba897a679db46..b19d54d7ed92f 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -17,6 +17,7 @@ unittest("IRTests") { "BasicBlockDbgInfoTest.cpp", "BasicBlockTest.cpp", "CFGBuilder.cpp", + "ConstantFPRangeTest.cpp", "ConstantRangeListTest.cpp", "ConstantRangeTest.cpp", "ConstantsTest.cpp", From d7c6e943836368c17467f2d0dcf1e8caddbd9b6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Wed, 25 Sep 2024 08:57:18 +0200 Subject: [PATCH 107/212] [AMDGPU][StructurizeCFG] pre-commit tests: maintain branch_weights metadata (#109812) --- .../structurizer-keep-perf-md.ll | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll diff --git a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll new file mode 100644 index 0000000000000..862c50c6183f1 --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-- -passes=structurizecfg %s | FileCheck -check-prefix=OPT %s + +define amdgpu_ps i32 @if_else(i32 %0) { +; OPT-LABEL: define amdgpu_ps i32 @if_else( +; OPT-SAME: i32 [[TMP0:%.*]]) { +; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0 +; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]] +; OPT: [[FLOW]]: +; OPT-NEXT: [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ] +; OPT-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ] +; OPT-NEXT: br i1 [[TMP3]], label %[[TRUE:.*]], label %[[EXIT:.*]] +; OPT: [[TRUE]]: +; OPT-NEXT: br label %[[EXIT]] +; OPT: [[FALSE]]: +; OPT-NEXT: br label %[[FLOW]] +; OPT: [[EXIT]]: +; OPT-NEXT: [[RET:%.*]] = phi i32 [ [[TMP2]], %[[FLOW]] ], [ 42, %[[TRUE]] ] +; OPT-NEXT: ret i32 [[RET]] +; + %c = icmp eq i32 %0, 0 + br i1 %c, label %true, label %false, !prof !0 + +true: ; preds = %1 + br label %exit + +false: ; preds = %1 + br label %exit + +exit: ; preds = %false, %true + %ret = phi i32 [ 42, %true ], [ 33, %false ] + ret i32 %ret +} + +define amdgpu_ps void @loop_if_break(i32 %n) { +; OPT-LABEL: define amdgpu_ps void @loop_if_break( +; OPT-SAME: i32 [[N:%.*]]) { +; OPT-NEXT: [[ENTRY:.*]]: +; OPT-NEXT: br label %[[LOOP:.*]] +; OPT: [[LOOP]]: +; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ] +; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0 +; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]] +; OPT: [[LOOP_BODY]]: +; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1 +; OPT-NEXT: br label %[[FLOW]] +; OPT: [[FLOW]]: +; OPT-NEXT: [[TMP0]] = phi i32 [ [[I_NEXT]], %[[LOOP_BODY]] ], [ undef, %[[LOOP]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[LOOP_BODY]] ], [ true, %[[LOOP]] ] +; OPT-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP]] +; OPT: [[EXIT]]: +; OPT-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop_body, %entry + %i = phi i32 [ %n, %entry ], [ %i.next, %loop_body ] + %c = icmp ugt i32 %i, 0 + br i1 %c, label %loop_body, label %exit, !prof !0 + +loop_body: ; preds = %loop + %i.next = sub i32 %i, 1 + br label %loop + +exit: ; preds = %loop + ret void +} + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!0 = !{!"branch_weights", i32 1000, i32 1} From 90c14748638f1e10e31173b145fdbb5c4529c922 Mon Sep 17 00:00:00 2001 From: Timothy Pearson <162513562+tpearson-ssc@users.noreply.github.com> Date: Wed, 25 Sep 2024 02:09:50 -0500 Subject: [PATCH 108/212] [SDAG] Honor signed arguments in floating point libcalls (#109134) In ExpandFPLibCall, an assumption is made that all floating point libcalls that take integer arguments use unsigned integers. In the case of ldexp and frexp, this assumption is incorrect, leading to miscompilation and subsequent target-dependent incorrect operation. Indicate that ldexp and frexp utilize signed arguments in ExpandFPLibCall. Fixes #108904 Signed-off-by: Timothy Pearson --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 +- llvm/test/CodeGen/PowerPC/ldexp-libcall.ll | 4 +- llvm/test/CodeGen/PowerPC/ldexp.ll | 36 ++++++---- .../PowerPC/negative-integer-fp-libcall.ll | 26 +++++++ .../X86/fold-int-pow2-with-fmul-or-fdiv.ll | 69 ++++++++++++------- 5 files changed, 96 insertions(+), 42 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3c087727a8012..04eb891f719d2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2205,7 +2205,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, Results.push_back(Tmp.first); Results.push_back(Tmp.second); } else { - SDValue Tmp = ExpandLibCall(LC, Node, false).first; + bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP; + SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first; Results.push_back(Tmp); } } diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll index 6144a9d920365..e531516c37e87 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll @@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: bl ldexp ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 151df6096b30b..ffc826cc86de5 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v3 +; CHECK-NEXT: vextuwrx r3, r3, v3 ; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: vmr v30, v2 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: xscvdpspn v29, f1 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: vextuwrx r3, r3, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xscvdpspn vs0, f1 @@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: li r3, 12 -; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: vextuwrx r3, r3, v3 ; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: vextuwrx r4, r3, v3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxswapd vs0, v30 -; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: li r3, 12 ; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: xscvspdpn f1, v30 +; CHECK-NEXT: vextuwrx r3, r3, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxmrghd vs0, v29, vs1 +; CHECK-NEXT: xxmrghd vs0, vs1, v29 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: vextuwrx r3, r3, v31 ; CHECK-NEXT: xvcvdpsp v28, vs0 ; CHECK-NEXT: xxsldwi vs0, v30, v30, 3 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 +; CHECK-NEXT: mfvsrwz r3, v31 ; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: mfvsrwz r4, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop @@ -156,7 +162,7 @@ define half @ldexp_f16(half %arg0, i32 %arg1) { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 diff --git a/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll new file mode 100644 index 0000000000000..010ee6ef043e7 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -O1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; Test that a negative parameter smaller than 64 bits (e.g., int) +; is correctly implemented with sign-extension when passed to +; a floating point libcall. + +define double @ldexp_test(ptr %a, ptr %b) nounwind { +; CHECK-LABEL: ldexp_test: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -112(1) +; CHECK-NEXT: std 0, 128(1) +; CHECK-NEXT: lfd 1, 0(3) +; CHECK-NEXT: lwa 4, 0(4) +; CHECK-NEXT: bl ldexp +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 112 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %base = load double, ptr %a + %exp = load i32, ptr %b + %call = call double @llvm.ldexp.f64.i32(double %base, i32 signext %exp) + ret double %call +} diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll index 5a051a9c499e4..332fbf7188af8 100644 --- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -406,13 +406,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: subq $72, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-SSE-NEXT: pextrw $7, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $7, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $6, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $6, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -420,13 +422,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $5, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $5, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $4, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $4, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -436,13 +440,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $3, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $3, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $2, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $2, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -450,14 +456,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $1, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $1, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: movd %xmm0, %eax -; CHECK-SSE-NEXT: movzwl %ax, %edi +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -476,13 +483,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: subq $72, %rsp ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -490,13 +499,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -506,13 +517,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -520,14 +533,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vmovd %xmm0, %eax -; CHECK-AVX2-NEXT: movzwl %ax, %edi +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -546,7 +560,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: subq $72, %rsp ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -554,7 +569,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -564,7 +580,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -572,7 +589,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -584,7 +602,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -592,7 +611,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -602,7 +622,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -611,7 +632,7 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax -; CHECK-AVX512F-NEXT: movzwl %ax, %edi +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 From ae7b454f98ba002ba1cc4cf19fac55f629cfc52f Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 25 Sep 2024 09:17:49 +0200 Subject: [PATCH 109/212] Revert "[MLIR] Make `OneShotModuleBufferize` use `OpInterface`" (#109919) Reverts llvm/llvm-project#107295 This commit breaks an integration test: ``` build/bin/mlir-opt mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir -one-shot-bufferize="bufferize-function-boundaries" ``` --- .../IR/BufferizableOpInterface.h | 7 +- .../FuncBufferizableOpInterfaceImpl.h | 12 +- .../IR/BufferizableOpInterface.cpp | 5 +- .../FuncBufferizableOpInterfaceImpl.cpp | 2 +- .../Transforms/OneShotModuleBufferize.cpp | 111 ++++++------- .../Transforms/transform-ops.mlir | 142 ++++++++--------- mlir/test/Dialect/LLVM/transform-e2e.mlir | 22 ++- .../Linalg/matmul-shared-memory-padding.mlir | 52 +++--- .../Linalg/pad-to-specific-memory-space.mlir | 148 +++++++++--------- .../test/Dialect/Vector/transform-vector.mlir | 84 +++++----- mlir/test/Examples/transform/ChH/full.mlir | 11 +- 11 files changed, 281 insertions(+), 315 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index d19687ec9afee..aceb9d059b95f 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -11,7 +11,6 @@ #include "mlir/IR/Operation.h" #include "mlir/IR/PatternMatch.h" -#include "mlir/Interfaces/FunctionInterfaces.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/DenseMapInfoVariant.h" #include "llvm/ADT/SetVector.h" @@ -261,9 +260,9 @@ struct BufferizationOptions { using AnalysisStateInitFn = std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, func op, bufferization options - using FunctionArgTypeConverterFn = std::function; + using FunctionArgTypeConverterFn = + std::function; /// Tensor -> MemRef type converter. /// Parameters: Value, memory space, bufferization options using UnknownTypeConverterFn = std::function equivalentFuncArgs; + DenseMap equivalentFuncArgs; /// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices. - DenseMap aliasingReturnVals; + DenseMap aliasingReturnVals; /// A set of all read BlockArguments of FuncOps. - DenseMap readBbArgs; + DenseMap readBbArgs; /// A set of all written-to BlockArguments of FuncOps. - DenseMap writtenBbArgs; + DenseMap writtenBbArgs; /// Keep track of which FuncOps are fully analyzed or currently being /// analyzed. - DenseMap analyzedFuncOps; + DenseMap analyzedFuncOps; /// This function is called right before analyzing the given FuncOp. It /// initializes the data structures for the FuncOp in this state object. - void startFunctionAnalysis(FunctionOpInterface funcOp); + void startFunctionAnalysis(FuncOp funcOp); }; void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 92f757111cbaf..85604eef2f283 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -18,7 +18,6 @@ #include "mlir/IR/TypeUtilities.h" #include "mlir/IR/Value.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" -#include "mlir/Interfaces/FunctionInterfaces.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Debug.h" @@ -315,7 +314,7 @@ namespace { /// Default function arg type converter: Use a fully dynamic layout map. BaseMemRefType defaultFunctionArgTypeConverter(TensorType type, Attribute memorySpace, - FunctionOpInterface funcOp, + func::FuncOp funcOp, const BufferizationOptions &options) { return getMemRefTypeWithFullyDynamicLayout(type, memorySpace); } @@ -362,7 +361,7 @@ BufferizationOptions::dynCastBufferizableOp(Value value) const { void BufferizationOptions::setFunctionBoundaryTypeConversion( LayoutMapOption layoutMapOption) { functionArgTypeConverterFn = [=](TensorType tensorType, Attribute memorySpace, - FunctionOpInterface funcOp, + func::FuncOp funcOp, const BufferizationOptions &options) { if (layoutMapOption == LayoutMapOption::IdentityLayoutMap) return bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType, diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index 9749a71f3514b..9fbe574ec392d 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -22,7 +22,7 @@ namespace mlir { namespace bufferization { namespace func_ext { -void FuncAnalysisState::startFunctionAnalysis(FunctionOpInterface funcOp) { +void FuncAnalysisState::startFunctionAnalysis(FuncOp funcOp) { analyzedFuncOps[funcOp] = FuncOpAnalysisState::InProgress; auto createdEquiv = equivalentFuncArgs.try_emplace(funcOp, IndexMapping()); auto createdAliasingResults = diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index f934ae88277b6..0a4072605c265 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -75,7 +75,7 @@ using namespace mlir::bufferization; using namespace mlir::bufferization::func_ext; /// A mapping of FuncOps to their callers. -using FuncCallerMap = DenseMap>; +using FuncCallerMap = DenseMap>; /// Get or create FuncAnalysisState. static FuncAnalysisState & @@ -88,11 +88,10 @@ getOrCreateFuncAnalysisState(OneShotAnalysisState &state) { /// Return the unique ReturnOp that terminates `funcOp`. /// Return nullptr if there is no such unique ReturnOp. -static Operation *getAssumedUniqueReturnOp(FunctionOpInterface funcOp) { - Operation *returnOp = nullptr; - for (Block &b : funcOp.getFunctionBody()) { - auto candidateOp = b.getTerminator(); - if (candidateOp && candidateOp->hasTrait()) { +static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) { + func::ReturnOp returnOp; + for (Block &b : funcOp.getBody()) { + if (auto candidateOp = dyn_cast(b.getTerminator())) { if (returnOp) return nullptr; returnOp = candidateOp; @@ -127,16 +126,16 @@ static void annotateEquivalentReturnBbArg(OpOperand &returnVal, /// Store function BlockArguments that are equivalent to/aliasing a returned /// value in FuncAnalysisState. static LogicalResult -aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, - OneShotAnalysisState &state, +aliasingFuncOpBBArgsAnalysis(FuncOp funcOp, OneShotAnalysisState &state, FuncAnalysisState &funcState) { - if (funcOp.getFunctionBody().empty()) { + if (funcOp.getBody().empty()) { // No function body available. Conservatively assume that every tensor // return value may alias with any tensor bbArg. - for (const auto &inputIt : llvm::enumerate(funcOp.getArgumentTypes())) { + FunctionType type = funcOp.getFunctionType(); + for (const auto &inputIt : llvm::enumerate(type.getInputs())) { if (!isa(inputIt.value())) continue; - for (const auto &resultIt : llvm::enumerate(funcOp.getResultTypes())) { + for (const auto &resultIt : llvm::enumerate(type.getResults())) { if (!isa(resultIt.value())) continue; int64_t returnIdx = resultIt.index(); @@ -148,7 +147,7 @@ aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, } // Support only single return-terminated block in the function. - Operation *returnOp = getAssumedUniqueReturnOp(funcOp); + func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); assert(returnOp && "expected func with single return op"); for (OpOperand &returnVal : returnOp->getOpOperands()) @@ -169,8 +168,8 @@ aliasingFuncOpBBArgsAnalysis(FunctionOpInterface funcOp, return success(); } -static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, - bool isRead, bool isWritten) { +static void annotateFuncArgAccess(func::FuncOp funcOp, int64_t idx, bool isRead, + bool isWritten) { OpBuilder b(funcOp.getContext()); Attribute accessType; if (isRead && isWritten) { @@ -190,12 +189,12 @@ static void annotateFuncArgAccess(FunctionOpInterface funcOp, int64_t idx, /// function with unknown ops, we conservatively assume that such ops bufferize /// to a read + write. static LogicalResult -funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, - OneShotAnalysisState &state, +funcOpBbArgReadWriteAnalysis(FuncOp funcOp, OneShotAnalysisState &state, FuncAnalysisState &funcState) { - for (int64_t idx = 0, e = funcOp.getNumArguments(); idx < e; ++idx) { + for (int64_t idx = 0, e = funcOp.getFunctionType().getNumInputs(); idx < e; + ++idx) { // Skip non-tensor arguments. - if (!isa(funcOp.getArgumentTypes()[idx])) + if (!isa(funcOp.getFunctionType().getInput(idx))) continue; bool isRead; bool isWritten; @@ -205,7 +204,7 @@ funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, StringRef str = accessAttr.getValue(); isRead = str == "read" || str == "read-write"; isWritten = str == "write" || str == "read-write"; - } else if (funcOp.getFunctionBody().empty()) { + } else if (funcOp.getBody().empty()) { // If the function has no body, conservatively assume that all args are // read + written. isRead = true; @@ -231,19 +230,20 @@ funcOpBbArgReadWriteAnalysis(FunctionOpInterface funcOp, /// Remove bufferization attributes on FuncOp arguments. static void removeBufferizationAttributes(BlockArgument bbArg) { - auto funcOp = cast(bbArg.getOwner()->getParentOp()); + auto funcOp = cast(bbArg.getOwner()->getParentOp()); funcOp.removeArgAttr(bbArg.getArgNumber(), BufferizationDialect::kBufferLayoutAttrName); funcOp.removeArgAttr(bbArg.getArgNumber(), BufferizationDialect::kWritableAttrName); } -static FunctionOpInterface getCalledFunction(CallOpInterface callOp) { +/// Return the func::FuncOp called by `callOp`. +static func::FuncOp getCalledFunction(func::CallOp callOp) { SymbolRefAttr sym = llvm::dyn_cast_if_present(callOp.getCallableForCallee()); if (!sym) return nullptr; - return dyn_cast_or_null( + return dyn_cast_or_null( SymbolTable::lookupNearestSymbolFrom(callOp, sym)); } @@ -251,12 +251,12 @@ static FunctionOpInterface getCalledFunction(CallOpInterface callOp) { /// Note: This only adds new equivalence info if the called function was already /// analyzed. // TODO: This does not handle cyclic function call graphs etc. -static void equivalenceAnalysis(FunctionOpInterface funcOp, +static void equivalenceAnalysis(func::FuncOp funcOp, OneShotAnalysisState &state, FuncAnalysisState &funcState) { - funcOp->walk([&](CallOpInterface callOp) { - FunctionOpInterface calledFunction = getCalledFunction(callOp); - assert(calledFunction && "could not retrieved called FunctionOpInterface"); + funcOp->walk([&](func::CallOp callOp) { + func::FuncOp calledFunction = getCalledFunction(callOp); + assert(calledFunction && "could not retrieved called func::FuncOp"); // No equivalence info available for the called function. if (!funcState.equivalentFuncArgs.count(calledFunction)) @@ -267,7 +267,7 @@ static void equivalenceAnalysis(FunctionOpInterface funcOp, int64_t bbargIdx = it.second; if (!state.isInPlace(callOp->getOpOperand(bbargIdx))) continue; - Value returnVal = callOp->getResult(returnIdx); + Value returnVal = callOp.getResult(returnIdx); Value argVal = callOp->getOperand(bbargIdx); state.unionEquivalenceClasses(returnVal, argVal); } @@ -277,9 +277,11 @@ static void equivalenceAnalysis(FunctionOpInterface funcOp, } /// Return "true" if the given function signature has tensor semantics. -static bool hasTensorSignature(FunctionOpInterface funcOp) { - return llvm::any_of(funcOp.getArgumentTypes(), llvm::IsaPred) || - llvm::any_of(funcOp.getResultTypes(), llvm::IsaPred); +static bool hasTensorSignature(func::FuncOp funcOp) { + return llvm::any_of(funcOp.getFunctionType().getInputs(), + llvm::IsaPred) || + llvm::any_of(funcOp.getFunctionType().getResults(), + llvm::IsaPred); } /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by @@ -289,16 +291,16 @@ static bool hasTensorSignature(FunctionOpInterface funcOp) { /// retrieve the called FuncOp from any func::CallOp. static LogicalResult getFuncOpsOrderedByCalls(ModuleOp moduleOp, - SmallVectorImpl &orderedFuncOps, + SmallVectorImpl &orderedFuncOps, FuncCallerMap &callerMap) { // For each FuncOp, the set of functions called by it (i.e. the union of // symbols of all nested func::CallOp). - DenseMap> calledBy; + DenseMap> calledBy; // For each FuncOp, the number of func::CallOp it contains. - DenseMap numberCallOpsContainedInFuncOp; - WalkResult res = moduleOp.walk([&](FunctionOpInterface funcOp) -> WalkResult { - if (!funcOp.getFunctionBody().empty()) { - Operation *returnOp = getAssumedUniqueReturnOp(funcOp); + DenseMap numberCallOpsContainedInFuncOp; + WalkResult res = moduleOp.walk([&](func::FuncOp funcOp) -> WalkResult { + if (!funcOp.getBody().empty()) { + func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); if (!returnOp) return funcOp->emitError() << "cannot bufferize a FuncOp with tensors and " @@ -307,10 +309,9 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, // Collect function calls and populate the caller map. numberCallOpsContainedInFuncOp[funcOp] = 0; - return funcOp.walk([&](CallOpInterface callOp) -> WalkResult { - FunctionOpInterface calledFunction = getCalledFunction(callOp); - assert(calledFunction && - "could not retrieved called FunctionOpInterface"); + return funcOp.walk([&](func::CallOp callOp) -> WalkResult { + func::FuncOp calledFunction = getCalledFunction(callOp); + assert(calledFunction && "could not retrieved called func::FuncOp"); // If the called function does not have any tensors in its signature, then // it is not necessary to bufferize the callee before the caller. if (!hasTensorSignature(calledFunction)) @@ -348,11 +349,11 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, /// most generic layout map as function return types. After bufferizing the /// entire function body, a more concise memref type can potentially be used for /// the return type of the function. -static void foldMemRefCasts(FunctionOpInterface funcOp) { - if (funcOp.getFunctionBody().empty()) +static void foldMemRefCasts(func::FuncOp funcOp) { + if (funcOp.getBody().empty()) return; - Operation *returnOp = getAssumedUniqueReturnOp(funcOp); + func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); SmallVector resultTypes; for (OpOperand &operand : returnOp->getOpOperands()) { @@ -364,8 +365,8 @@ static void foldMemRefCasts(FunctionOpInterface funcOp) { } } - auto newFuncType = FunctionType::get(funcOp.getContext(), - funcOp.getArgumentTypes(), resultTypes); + auto newFuncType = FunctionType::get( + funcOp.getContext(), funcOp.getFunctionType().getInputs(), resultTypes); funcOp.setType(newFuncType); } @@ -378,7 +379,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, FuncAnalysisState &funcState = getOrCreateFuncAnalysisState(state); // A list of functions in the order in which they are analyzed + bufferized. - SmallVector orderedFuncOps; + SmallVector orderedFuncOps; // A mapping of FuncOps to their callers. FuncCallerMap callerMap; @@ -387,7 +388,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, return failure(); // Analyze ops. - for (FunctionOpInterface funcOp : orderedFuncOps) { + for (func::FuncOp funcOp : orderedFuncOps) { if (!state.getOptions().isOpAllowed(funcOp)) continue; @@ -415,7 +416,7 @@ mlir::bufferization::analyzeModuleOp(ModuleOp moduleOp, void mlir::bufferization::removeBufferizationAttributesInModule( ModuleOp moduleOp) { - moduleOp.walk([&](FunctionOpInterface op) { + moduleOp.walk([&](func::FuncOp op) { for (BlockArgument bbArg : op.getArguments()) removeBufferizationAttributes(bbArg); }); @@ -429,7 +430,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( IRRewriter rewriter(moduleOp.getContext()); // A list of functions in the order in which they are analyzed + bufferized. - SmallVector orderedFuncOps; + SmallVector orderedFuncOps; // A mapping of FuncOps to their callers. FuncCallerMap callerMap; @@ -438,11 +439,11 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( return failure(); // Bufferize functions. - for (FunctionOpInterface funcOp : orderedFuncOps) { + for (func::FuncOp funcOp : orderedFuncOps) { // Note: It would be good to apply cleanups here but we cannot as aliasInfo // would be invalidated. - if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getName())) { + if (llvm::is_contained(options.noAnalysisFuncFilter, funcOp.getSymName())) { // This function was not analyzed and RaW conflicts were not resolved. // Buffer copies must be inserted before every write. OneShotBufferizationOptions updatedOptions = options; @@ -462,7 +463,7 @@ LogicalResult mlir::bufferization::bufferizeModuleOp( // Bufferize all other ops. for (Operation &op : llvm::make_early_inc_range(moduleOp.getOps())) { // Functions were already bufferized. - if (isa(&op)) + if (isa(&op)) continue; if (failed(bufferizeOp(&op, options, statistics))) return failure(); @@ -489,12 +490,12 @@ LogicalResult mlir::bufferization::runOneShotModuleBufferize( // FuncOps whose names are specified in options.noAnalysisFuncFilter will // not be analyzed. Ops in these FuncOps will not be analyzed as well. OpFilter::Entry::FilterFn analysisFilterFn = [=](Operation *op) { - auto func = dyn_cast(op); + auto func = dyn_cast(op); if (!func) - func = op->getParentOfType(); + func = op->getParentOfType(); if (func) return llvm::is_contained(options.noAnalysisFuncFilter, - func.getName()); + func.getSymName()); return false; }; OneShotBufferizationOptions updatedOptions(options); diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir index 588aa8a85a84e..3c50a9e72d9d9 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt --transform-interpreter %s -split-input-file -verify-diagnostics | FileCheck %s // Test One-Shot Bufferize. @@ -12,21 +12,19 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor -module @payload attributes { transform.target_tag = "payload" } { - func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index +func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: memref.copy %[[A_memref]], %[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] + // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) + // CHECK: memref.copy %[[A_memref]], %[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - // CHECK: return %[[res_tensor]] - return %0 : tensor - } + // CHECK: return %[[res_tensor]] + return %0 : tensor } // ----- @@ -44,21 +42,19 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor // CHECK-NOT: memref.copy -module @payload attributes { transform.target_tag = "payload" } { - func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index +func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] + // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] + // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) + // CHECK: linalg.copy ins(%[[A_memref]] : memref<{{.*}}>) outs(%[[alloc]] + // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] + // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - // CHECK: return %[[res_tensor]] - return %0 : tensor - } + // CHECK: return %[[res_tensor]] + return %0 : tensor } // ----- @@ -76,15 +72,13 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @test_function_analysis( // CHECK-SAME: %[[A:.*]]: tensor -module @payload attributes { transform.target_tag = "payload" } { - func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { - %c0 = arith.constant 0 : index - // CHECK: vector.transfer_write - // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} - // CHECK-SAME: tensor - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor - return %0 : tensor - } +func.func @test_function_analysis(%A : tensor, %v : vector<4xf32>) -> (tensor) { + %c0 = arith.constant 0 : index + // CHECK: vector.transfer_write + // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} + // CHECK-SAME: tensor + %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor + return %0 : tensor } // ----- @@ -101,12 +95,10 @@ module attributes {transform.with_named_sequence} { } } -module @payload attributes { transform.target_tag = "payload" } { - func.func @test_unknown_op_failure() -> (tensor) { - // expected-error @+1 {{op was not bufferized}} - %0 = "test.dummy_op"() : () -> (tensor) - return %0 : tensor - } +func.func @test_unknown_op_failure() -> (tensor) { + // expected-error @+1 {{op was not bufferized}} + %0 = "test.dummy_op"() : () -> (tensor) + return %0 : tensor } // ----- @@ -119,7 +111,7 @@ module attributes {transform.with_named_sequence} { } } -module @payload attributes { transform.target_tag = "payload" } { +module { // CHECK-LABEL: func @test_function( // CHECK-SAME: %[[A:.*]]: tensor func.func @test_function(%A : tensor, %v : vector<4xf32>) -> (tensor) { @@ -154,13 +146,11 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[A:.*]]: memref<12x9xf32>, // CHECK-SAME: %[[B:.*]]: memref<9x6xf32>, // CHECK-SAME: %[[C:.*]]: memref<12x6xf32>) -> memref<12x6xf32> { -module @payload attributes { transform.target_tag = "payload" } { - func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { - // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) - %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> - // CHECK: return %[[C]] : memref<12x6xf32> - return %D : tensor<12x6xf32> - } +func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { + // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) + %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> + // CHECK: return %[[C]] : memref<12x6xf32> + return %D : tensor<12x6xf32> } // ----- @@ -175,12 +165,10 @@ module attributes {transform.with_named_sequence} { } // Expect `bufferization.empty_tensor_to_alloc_tensor` to replace the tensor.empty. -module @payload attributes { transform.target_tag = "payload" } { - func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { - // CHECK: bufferization.alloc_tensor - %0 = tensor.empty() : tensor<2x2xf32> - return %0 : tensor<2x2xf32> - } +func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK: bufferization.alloc_tensor + %0 = tensor.empty() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> } // ----- @@ -197,15 +185,13 @@ module attributes {transform.with_named_sequence} { // CHECK: tensor.extract_slice // CHECK: linalg.fill // CHECK: tensor.insert_slice -module @payload attributes { transform.target_tag = "payload" } { - func.func @empty_tensor_elimination( - %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { - %0 = tensor.empty() : tensor<5xf32> - %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> - %2 = tensor.insert_slice %1 into %t [1][5][1] - : tensor<5xf32> into tensor<10xf32> - return %2 : tensor<10xf32> - } +func.func @empty_tensor_elimination( + %t: tensor<10xf32>, %f: f32) -> tensor<10xf32> { + %0 = tensor.empty() : tensor<5xf32> + %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<5xf32>) -> tensor<5xf32> + %2 = tensor.insert_slice %1 into %t [1][5][1] + : tensor<5xf32> into tensor<10xf32> + return %2 : tensor<10xf32> } // ----- @@ -222,14 +208,12 @@ module attributes {transform.with_named_sequence} { // CHECK: memref.alloca // CHECK: scf.for // CHECK: memref.store -module @payload attributes { transform.target_tag = "payload" } { - func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { - scf.for %iv = %lb to %ub step %step { - %0 = memref.alloca() : memref<5xf32> - memref.store %f, %0[%pos] : memref<5xf32> - } - return +func.func @buffer_loop_hoisting(%lb: index, %ub: index, %step: index, %f: f32, %pos: index) { + scf.for %iv = %lb to %ub step %step { + %0 = memref.alloca() : memref<5xf32> + memref.store %f, %0[%pos] : memref<5xf32> } + return } // ----- @@ -247,12 +231,10 @@ module attributes {transform.with_named_sequence} { // Expect `bufferization.bufferize_to_allocation` to create an alloc. // CHECK-LABEL: func.func @empty_to_tensor_alloc() -module @payload attributes { transform.target_tag = "payload" } { - func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { - // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> - // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> - // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> - %0 = bufferization.alloc_tensor() : tensor<2x2xf32> - return %0 : tensor<2x2xf32> - } +func.func @empty_to_tensor_alloc() -> tensor<2x2xf32> { + // CHECK-NEXT: %[[alloca:.*]] = memref.alloca() : memref<2x2xf32> + // CHECK-NEXT: %[[tensor:.*]] = bufferization.to_tensor %[[alloca]] restrict writable : memref<2x2xf32> + // CHECK-NEXT: return %[[tensor]] : tensor<2x2xf32> + %0 = bufferization.alloc_tensor() : tensor<2x2xf32> + return %0 : tensor<2x2xf32> } diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir index 3e637a3ec49a4..c00b47fb936e9 100644 --- a/mlir/test/Dialect/LLVM/transform-e2e.mlir +++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir @@ -1,17 +1,15 @@ -// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s +// RUN: mlir-opt %s --transform-interpreter -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s // CHECK-LABEL: llvm.func @matmul_tensors -module @payload attributes { transform.target_tag = "payload" } { - func.func @matmul_tensors( - %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> { - // CHECK-NOT: linalg - // CHECK: llvm.intr.fmuladd{{.*}} - %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) - outs(%arg2: tensor<2x6xf32>) - -> tensor<2x6xf32> - return %0 : tensor<2x6xf32> - } +func.func @matmul_tensors( + %arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>) + -> tensor<2x6xf32> { +// CHECK-NOT: linalg +// CHECK: llvm.intr.fmuladd{{.*}} + %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) + outs(%arg2: tensor<2x6xf32>) + -> tensor<2x6xf32> + return %0 : tensor<2x6xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir index 9c223737750a9..3f8d2ea06641e 100644 --- a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir +++ b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file --transform-interpreter="debug-payload-root-tag=payload" %s | FileCheck %s +// RUN: mlir-opt --split-input-file --transform-interpreter %s | FileCheck %s // CHECK-LABEL: func @matmul_divisible // CHECK: scf.forall @@ -24,21 +24,19 @@ // CHECK: scf.forall // CHECK: vector.transfer_read // CHECK: vector.transfer_write -module @payload attributes { transform.target_tag = "payload" } { - func.func @matmul_divisible(%A: tensor<1024x1024xf32>, - %B: tensor<1024x1024xf32>, - %C: tensor<1024x1024xf32>) +func.func @matmul_divisible(%A: tensor<1024x1024xf32>, + %B: tensor<1024x1024xf32>, + %C: tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> +{ + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill ins(%cst : f32) + outs(%C : tensor<1024x1024xf32>) -> tensor<1024x1024xf32> - { - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%cst : f32) - outs(%C : tensor<1024x1024xf32>) - -> tensor<1024x1024xf32> - %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) - outs(%0 : tensor<1024x1024xf32>) - -> tensor<1024x1024xf32> - return %1 : tensor<1024x1024xf32> - } + %1 = linalg.matmul ins(%A, %B : tensor<1024x1024xf32>, tensor<1024x1024xf32>) + outs(%0 : tensor<1024x1024xf32>) + -> tensor<1024x1024xf32> + return %1 : tensor<1024x1024xf32> } module attributes {transform.with_named_sequence} { @@ -145,21 +143,19 @@ module attributes {transform.with_named_sequence} { // CHECK: linalg.matmul // CHECK: vector.transfer_read // CHECK: vector.transfer_write -module @payload attributes { transform.target_tag = "payload" } { func.func @matmul_not_divisible(%A: tensor<1023x1023xf32>, - %B: tensor<1023x1023xf32>, - %C: tensor<1023x1023xf32>) + %B: tensor<1023x1023xf32>, + %C: tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> +{ + %cst = arith.constant 0.000000e+00 : f32 + %0 = linalg.fill ins(%cst : f32) + outs(%C : tensor<1023x1023xf32>) -> tensor<1023x1023xf32> - { - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%cst : f32) - outs(%C : tensor<1023x1023xf32>) - -> tensor<1023x1023xf32> - %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) - outs(%0 : tensor<1023x1023xf32>) - -> tensor<1023x1023xf32> - return %1 : tensor<1023x1023xf32> - } + %1 = linalg.matmul ins(%A, %B : tensor<1023x1023xf32>, tensor<1023x1023xf32>) + outs(%0 : tensor<1023x1023xf32>) + -> tensor<1023x1023xf32> + return %1 : tensor<1023x1023xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir index 5e5657980ba12..f2e9e839b7c46 100644 --- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir +++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s +// RUN: mlir-opt --transform-interpreter -cse -canonicalize -split-input-file -verify-diagnostics %s | FileCheck %s #map = affine_map<()[s0] -> (-s0 + 12, 7)> @@ -7,45 +7,43 @@ // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, -module @payload attributes { transform.target_tag = "payload" } { - func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, - %iv2 : index) -> tensor<24x25xf32> { - %0 = affine.min #map()[%iv2] - - // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] - %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] - %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor - // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] - %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] - // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] - // CHECK: memref.copy %[[s0]], %[[alloc0_view]] - - // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> - // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] - // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] - // CHECK: memref.copy %[[s1]], %[[alloc1_view]] - - // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] - // No subview because there is 0 padding - // CHECK: memref.copy %[[s2]], %[[alloc2]] - - // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) - // Copy back result. - // CHECK: memref.copy %[[alloc2]], %[[s2]] - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> - - // insert_slice bufferizes to a no-op. - %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - func.return %5 : tensor<24x25xf32> - } +func.func @pad_to_memory_space(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, + %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> + // CHECK: linalg.fill {{.*}} outs(%[[alloc0]] + // CHECK: %[[alloc0_view:.*]] = memref.subview %[[alloc0]][0, 0] [4, %{{.*}}] [1, 1] + // CHECK: memref.copy %[[s0]], %[[alloc0_view]] + + // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> + // CHECK: linalg.fill {{.*}} outs(%[[alloc1]] + // CHECK: %[[alloc1_view:.*]] = memref.subview %[[alloc1]][0, 0] [%{{.*}}, 5] [1, 1] + // CHECK: memref.copy %[[s1]], %[[alloc1_view]] + + // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> + // CHECK-NOT: linalg.fill {{.*}} outs(%[[alloc2]] + // No subview because there is 0 padding + // CHECK: memref.copy %[[s2]], %[[alloc2]] + + // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) + // Copy back result. + // CHECK: memref.copy %[[alloc2]], %[[s2]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + + // insert_slice bufferizes to a no-op. + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } module attributes {transform.with_named_sequence} { @@ -71,42 +69,40 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: %[[arg0:.*]]: memref<24x12xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg1:.*]]: memref<12x25xf32, strided<[?, ?], offset: ?>>, // CHECK-SAME: %[[arg2:.*]]: memref<24x25xf32, strided<[?, ?], offset: ?>>, -module @payload attributes { transform.target_tag = "payload" } { - func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>, - %iv0 : index, %iv1 : index, - %iv2 : index) -> tensor<24x25xf32> { - %0 = affine.min #map()[%iv2] - - // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] - %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> - // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] - %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor - // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] - %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> - - // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] - // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] - - // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] - // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] - - // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] - // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> - // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] - - // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) - // Copy back result. - // CHECK: memref.copy %[[alloc2]], %[[s2]] - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> - - // insert_slice bufferizes to a no-op. - %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> - func.return %5 : tensor<24x25xf32> - } +func.func @vectorize_and_bufferize_pad(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>, + %iv0 : index, %iv1 : index, + %iv2 : index) -> tensor<24x25xf32> { + %0 = affine.min #map()[%iv2] + + // CHECK: %[[s0:.*]] = memref.subview %[[arg0]] + %1 = tensor.extract_slice %arg0[%iv0, %iv2] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> + // CHECK: %[[s1:.*]] = memref.subview %[[arg1]] + %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor + // CHECK: %[[s2:.*]] = memref.subview %[[arg2]] + %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> + + // CHECK: %[[v0:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s0]] + // CHECK: %[[alloc0:.*]] = memref.alloc() : memref<4x7xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v0]], %[[alloc0]] + + // CHECK: %[[v1:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s1]] + // CHECK: %[[alloc1:.*]] = memref.alloc() : memref<7x5xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v1]], %[[alloc1]] + + // CHECK: %[[v2:.*]] = vector.mask {{.*}} { vector.transfer_read %[[s2]] + // CHECK: %[[alloc2:.*]] = memref.alloc() : memref<4x5xf32, 3> + // CHECK: vector.mask {{.*}} { vector.transfer_write %[[v2]], %[[alloc0]] + + // CHECK: linalg.matmul ins(%[[alloc0]], %[[alloc1]] : {{.*}}) outs(%[[alloc2]] : {{.*}}) + // Copy back result. + // CHECK: memref.copy %[[alloc2]], %[[s2]] + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + + // insert_slice bufferizes to a no-op. + %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> + func.return %5 : tensor<24x25xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir index 0439844dc66ca..4b38db79bff3e 100644 --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -1,18 +1,16 @@ -// RUN: mlir-opt --transform-interpreter="debug-payload-root-tag=payload" %s --split-input-file | FileCheck %s +// RUN: mlir-opt %s --transform-interpreter --split-input-file | FileCheck %s // CHECK-LABEL: func @matmul_tensors -module @payload attributes { transform.target_tag = "payload" } { - func.func @matmul_tensors( - %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> { - // CHECK-NOT: linalg - // CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> - // CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> - %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) - outs(%arg2: tensor<8x32xf32>) - -> tensor<8x32xf32> - return %0 : tensor<8x32xf32> - } +func.func @matmul_tensors( + %arg0: tensor<8x16xf32>, %arg1: tensor<16x32xf32>, %arg2: tensor<8x32xf32>) + -> tensor<8x32xf32> { +// CHECK-NOT: linalg +// CHECK: vector.extract {{.*}} : vector<4xf32> from vector<8x4xf32> +// CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> + %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) + outs(%arg2: tensor<8x32xf32>) + -> tensor<8x32xf32> + return %0 : tensor<8x32xf32> } module attributes {transform.with_named_sequence} { @@ -78,13 +76,11 @@ module attributes {transform.with_named_sequence} { // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} // CHECK-SAME: %[[ARG0]], %[[ARG1]], %[[ARG2]] : vector<64x64xf16>, vector<64x64xf16> into vector<64x64xf32> // CHECK-NEXT: return %[[R]] : vector<64x64xf32> -module @payload attributes { transform.target_tag = "payload" } { - func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { - %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> - %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> - %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> - return %result : vector<64x64xf32> - } +func.func @fold_arith_extf_into_contract(%arg0: vector<64x64xf16>, %arg1: vector<64x64xf16>, %arg2: vector<64x64xf32>) -> vector<64x64xf32> { + %lhs_f32 = arith.extf %arg0 : vector<64x64xf16> to vector<64x64xf32> + %rhs_f32 = arith.extf %arg1 : vector<64x64xf16> to vector<64x64xf32> + %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_f32, %rhs_f32, %arg2 : vector<64x64xf32>, vector<64x64xf32> into vector<64x64xf32> + return %result : vector<64x64xf32> } module attributes {transform.with_named_sequence} { @@ -99,32 +95,30 @@ module attributes {transform.with_named_sequence} { // ----- -module @payload attributes { transform.target_tag = "payload" } { - // CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 - // CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, - // CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { - // CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> - // CHECK: return %[[RES]] : vector<[4]x[4]xi32> - func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { - %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> - %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> - %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> - %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> - return %mul: vector<[4]x[4]xi32> - } +// CHECK-LABEL: func.func @arith_to_outerproduct_scalable_i32 +// CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, +// CHECK-SAME: %[[RHS:.*]]: vector<[4]xi32>) -> vector<[4]x[4]xi32> { +// CHECK: %[[RES:.*]] = vector.outerproduct %[[LHS]], %[[RHS]] : vector<[4]xi32>, vector<[4]xi32> +// CHECK: return %[[RES]] : vector<[4]x[4]xi32> +func.func @arith_to_outerproduct_scalable_i32(%lhs: vector<[4]xi32>, %rhs: vector<[4]xi32>) -> vector<[4]x[4]xi32> { + %lhsBcast = vector.broadcast %lhs : vector<[4]xi32> to vector<[4]x[4]xi32> + %lhsT = vector.transpose %lhsBcast, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> + %rhsBcast = vector.broadcast %rhs : vector<[4]xi32> to vector<[4]x[4]xi32> + %mul = arith.muli %lhsT, %rhsBcast : vector<[4]x[4]xi32> + return %mul: vector<[4]x[4]xi32> +} - // CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 - // CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, - // CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { - // CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> - // CHECK: return %[[RES]] : vector<8x16xf32> - func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { - %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> - %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> - %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> - %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> - return %mul: vector<8x16xf32> - } +// CHECK-LABEL: func.func @arith_to_outerproduct_trans_rhs_f32 +// CHECK-SAME: %[[LHS:.*]]: vector<16xf32>, +// CHECK-SAME: %[[RHS:.*]]: vector<8xf32>) -> vector<8x16xf32> { +// CHECK: %[[RES:.*]] = vector.outerproduct %[[RHS]], %[[LHS]] : vector<8xf32>, vector<16xf32> +// CHECK: return %[[RES]] : vector<8x16xf32> +func.func @arith_to_outerproduct_trans_rhs_f32(%lhs: vector<16xf32>, %rhs: vector<8xf32>) -> vector<8x16xf32> { + %rhsBcast = vector.broadcast %rhs : vector<8xf32> to vector<16x8xf32> + %rhsT = vector.transpose %rhsBcast, [1, 0] : vector<16x8xf32> to vector<8x16xf32> + %lhsBcast = vector.broadcast %lhs : vector<16xf32> to vector<8x16xf32> + %mul = arith.mulf %lhsBcast, %rhsT : vector<8x16xf32> + return %mul: vector<8x16xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Examples/transform/ChH/full.mlir b/mlir/test/Examples/transform/ChH/full.mlir index 85dbf67023323..259475ebdbf49 100644 --- a/mlir/test/Examples/transform/ChH/full.mlir +++ b/mlir/test/Examples/transform/ChH/full.mlir @@ -1,6 +1,8 @@ -// RUN: mlir-opt %s --transform-interpreter="debug-payload-root-tag=payload" \ -// RUN: --test-transform-dialect-erase-schedule |\ -// RUN: mlir-opt -pass-pipeline='builtin.module(builtin.module(math-uplift-to-fma,convert-bufferization-to-memref,test-lower-to-llvm))' - |\ +// RUN: mlir-opt %s --transform-interpreter \ +// RUN: --test-transform-dialect-erase-schedule \ +// RUN: --math-uplift-to-fma \ +// RUN: --convert-bufferization-to-memref \ +// RUN: --test-lower-to-llvm |\ // RUN: FileCheck %s // Fixed-size tensor types to be used in convolution. @@ -17,7 +19,6 @@ // tensors annotated with attributes from the `bufferization` dialect. These // attributes hint the bufferization pass to assume buffers can be directly // used for these tensors without reshaping. -module @payload attributes { transform.target_tag = "payload" } { func.func @conv( %input: !tinput {bufferization.writable = false, bufferization.access = "read", @@ -83,7 +84,7 @@ func.func @conv( return %relued : !toutput } -} + // Module containing the transformation script to be applied. The attribute // is required to correctly verify the use of named (macro-like) sequences. module attributes { transform.with_named_sequence } { From 2ccac07bf22d17029d4437b0a727dd55c8c86d56 Mon Sep 17 00:00:00 2001 From: Dmitry Polukhin <34227995+dmpolukhin@users.noreply.github.com> Date: Wed, 25 Sep 2024 08:31:49 +0100 Subject: [PATCH 110/212] [C++20][Modules] Fix crash when function and lambda inside loaded from different modules (#109167) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Because AST loading code is lazy and happens in unpredictable order, it is possible that a function and lambda inside the function can be loaded from different modules. As a result, the captured DeclRefExpr won’t match the corresponding VarDecl inside the function. This situation is reflected in the AST as follows: ``` FunctionDecl 0x555564f4aff0 line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected ()' inline |-also in ./folly-conv.h `-CompoundStmt 0x555564f7cfc8 |-DeclStmt 0x555564f7ced8 | `-VarDecl 0x555564f7cef8 col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit | `-IntegerLiteral 0x555564f7d080 'int' 0 |-CallExpr 0x555564f7cea8 '' | |-UnresolvedLookupExpr 0x555564f7bea0 '' lvalue (no ADL) = 'then_' 0x555564f7bef0 | |-CXXTemporaryObjectExpr 0x555564f7bcb0 'Expected':'folly::Expected' 'void () noexcept' zeroing | `-LambdaExpr 0x555564f7bc88 '(lambda at Conv.h:39:48)' | |-CXXRecordDecl 0x555564f76b88 col:48 imported in ./folly-conv.h hidden implicit class definition | | |-also in ./thrift_cpp2_base.h | | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init | | |-DefaultConstructor defaulted_is_constexpr | | |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveConstructor exists simple trivial needs_implicit | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveAssignment | | `-Destructor simple irrelevant trivial constexpr needs_implicit | `-CompoundStmt 0x555564f7d1a8 | `-ReturnStmt 0x555564f7d198 | `-DeclRefExpr 0x555564f7d0a0 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture `-ReturnStmt 0x555564f7bc78 `-InitListExpr 0x555564f7bc38 'void' ``` This diff modifies the AST deserialization process to load lambdas within the canonical function declaration sooner, immediately following the function, ensuring that they are loaded from the same module. Re-land https://github.com/llvm/llvm-project/pull/104512 Added test case that caused crash due to multiple enclosed lambdas deserialization. Test Plan: check-clang --- .../include/clang/Serialization/ASTBitCodes.h | 6 ++ clang/include/clang/Serialization/ASTReader.h | 12 +++ clang/include/clang/Serialization/ASTWriter.h | 8 ++ clang/lib/Serialization/ASTReader.cpp | 11 +++ clang/lib/Serialization/ASTReaderDecl.cpp | 10 +++ clang/lib/Serialization/ASTWriter.cpp | 22 ++++++ clang/lib/Serialization/ASTWriterDecl.cpp | 5 ++ ...rash-instantiated-in-scope-cxx-modules.cpp | 76 +++++++++++++++++++ ...ash-instantiated-in-scope-cxx-modules2.cpp | 30 ++++++++ ...ash-instantiated-in-scope-cxx-modules3.cpp | 26 +++++++ 10 files changed, 206 insertions(+) create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp create mode 100644 clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 4410df296d8ef..5be33ae0ed1b9 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -724,6 +724,12 @@ enum ASTRecordTypes { /// Record code for vtables to emit. VTABLES_TO_EMIT = 70, + + /// Record code for the FunctionDecl to lambdas mapping. These lambdas have to + /// be loaded right after the function they belong to. It is required to have + /// canonical declaration for the lambda class from the same module as + /// enclosing function. + FUNCTION_DECL_TO_LAMBDAS_MAP = 71, }; /// Record types used within a source manager block. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 898f4392465fd..c1843218a4b8b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -532,6 +532,18 @@ class ASTReader /// namespace as if it is not delayed. DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap; + /// Mapping from FunctionDecl IDs to the corresponding lambda IDs. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// It is required to have canonical declaration for lambda class from the + /// same module as enclosing function. This is required to correctly resolve + /// captured variables in the lambda. Without this, due to lazy + /// deserialization, canonical declarations for the function and lambdas can + /// be selected from different modules and DeclRefExprs may refer to the AST + /// nodes that don't exist in the function. + llvm::DenseMap> + FunctionToLambdasMap; + struct PendingUpdateRecord { Decl *D; GlobalDeclID ID; diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 10a50b711043a..760866fd9de93 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -233,6 +233,14 @@ class ASTWriter : public ASTDeserializationListener, /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`. llvm::SmallPtrSet PredefinedDecls; + /// Mapping from FunctionDecl to the list of lambda IDs inside the function. + /// + /// These lambdas have to be loaded right after the function they belong to. + /// In order to have canonical declaration for lambda class from the same + /// module as enclosing function during deserialization. + llvm::DenseMap> + FunctionToLambdasMap; + /// Offset of each declaration in the bitstream, indexed by /// the declaration's ID. std::vector DeclOffsets; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index ede3070787722..a369ad0be4795 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3856,6 +3856,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; } + case FUNCTION_DECL_TO_LAMBDAS_MAP: + for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) { + GlobalDeclID ID = ReadDeclID(F, Record, I); + auto &Lambdas = FunctionToLambdasMap[ID]; + unsigned NN = Record[I++]; + Lambdas.reserve(NN); + for (unsigned II = 0; II < NN; II++) + Lambdas.push_back(ReadDeclID(F, Record, I)); + } + break; + case OBJC_CATEGORIES_MAP: if (F.LocalNumObjCCategoriesInMap != 0) return llvm::createStringError( diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 9272e23c7da3f..7cead2728ca93 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -4351,6 +4351,16 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod)); DC->setHasExternalVisibleStorage(true); } + + // Load any pending lambdas for the function. + if (auto *FD = dyn_cast(D); FD && FD->isCanonicalDecl()) { + if (auto IT = FunctionToLambdasMap.find(ID); + IT != FunctionToLambdasMap.end()) { + for (auto LID : IT->second) + GetDecl(LID); + FunctionToLambdasMap.erase(IT); + } + } } void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4ee14b1e26015..f326e3c2e2ff7 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -903,6 +903,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(PENDING_IMPLICIT_INSTANTIATIONS); RECORD(UPDATE_VISIBLE); RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD); + RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP); RECORD(DECL_UPDATE_OFFSETS); RECORD(DECL_UPDATES); RECORD(CUDA_SPECIAL_DECL_REFS); @@ -5707,6 +5708,27 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD, DelayedNamespaceRecord); + if (!FunctionToLambdasMap.empty()) { + // TODO: on disk hash table for function to lambda mapping might be more + // efficent becuase it allows lazy deserialization. + RecordData FunctionToLambdasMapRecord; + for (const auto &Pair : FunctionToLambdasMap) { + FunctionToLambdasMapRecord.push_back( + GetDeclRef(Pair.first).getRawValue()); + FunctionToLambdasMapRecord.push_back(Pair.second.size()); + for (const auto &Lambda : Pair.second) + FunctionToLambdasMapRecord.push_back(Lambda.getRawValue()); + } + + auto Abv = std::make_shared(); + Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); + unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv)); + Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord, + FunctionToLambdaMapAbbrev); + } + const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); // Create a lexical update block containing all of the declarations in the // translation unit that do not come from other AST files. diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 555f6325da646..50c090b195d61 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1521,6 +1521,11 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { } else { Record.push_back(0); } + // For lambdas inside canonical FunctionDecl remember the mapping. + if (auto FD = llvm::dyn_cast_or_null(D->getDeclContext()); + FD && FD->isCanonicalDecl()) { + Writer.FunctionToLambdasMap[FD].push_back(Writer.GetDeclRef(D)); + } } else { Record.push_back(CXXRecNotTemplate); } diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp new file mode 100644 index 0000000000000..80844a58ad825 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules.cpp @@ -0,0 +1,76 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h + +//--- Conv.h +#pragma once + +template +_Up __declval(int); + +template +auto declval() noexcept -> decltype(__declval<_Tp>(0)); + +namespace folly { + +template +struct Expected { + template + auto thenOrThrow() -> decltype(declval()) { + return 1; + } +}; + +struct ExpectedHelper { + template + static constexpr Expected return_(T) { + return Expected(); + } + + template + static auto then_(This&&, Fn&&) + -> decltype(T::template return_((declval()(true), 0))) { + return Expected(); + } +}; + +template +inline Expected tryTo() { + Tgt result = 0; + // In build with asserts: + // clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa(D) && "declaration not instantiated in this scope"' failed. + // In release build compilation error on the line below inside lambda: + // error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized] + ExpectedHelper::then_(Expected(), [&](bool) { return result; }); + return {}; +} + +} // namespace folly + +inline void bar() { + folly::tryTo(); +} +// expected-no-diagnostics + +//--- folly-conv.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- thrift_cpp2_base.h +#pragma once +#include "Conv.h" +// expected-no-diagnostics + +//--- logger_base.h +#pragma once +import "folly-conv.h"; +import "thrift_cpp2_base.h"; + +inline void foo() { + folly::tryTo(); +} +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp new file mode 100644 index 0000000000000..5b1a904e928a6 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules2.cpp @@ -0,0 +1,30 @@ +// RUN: rm -fR %t +// RUN: split-file %s %t +// RUN: cd %t +// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h +// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp + +//--- header.h +template +void f(T) {} + +class A { + virtual ~A(); +}; + +inline A::~A() { + f([](){}); +} + +struct B { + void g() { + f([](){ + [](){}; + }); + } +}; +// expected-no-diagnostics + +//--- main.cpp +import "header.h"; +// expected-no-diagnostics diff --git a/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp new file mode 100644 index 0000000000000..646ff9f745710 --- /dev/null +++ b/clang/test/Headers/crash-instantiated-in-scope-cxx-modules3.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t +// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -fsyntax-only -verify + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// No crash or assertion failure on multiple nested lambdas deserialization. +template +void b() { + [] { + []{ + []{ + []{ + []{ + }(); + }(); + }(); + }(); + }(); +} + +void foo() { + b(); +} +#endif From 0259f92711599c45d229fb12f6f51915fffac6bd Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 25 Sep 2024 09:33:15 +0200 Subject: [PATCH 111/212] [mlir][memref] Add builder that infers `reinterpret_cast` result type (#109432) Add a convenience builder that infers the result type of `memref.reinterpret_cast`. Note: It is not possible to remove the result type from all builder overloads because this op currently also allows certain operand/attribute + result type combinations that do not match. The op verifier should probably be made stricter, but that's a larger change that requires additional `memref.cast` ops in some places that build `reinterpret_cast` ops. --- .../mlir/Dialect/MemRef/IR/MemRefOps.td | 4 ++++ .../IR/BufferDeallocationOpInterface.cpp | 6 ++++-- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index 2ff9d612a5efa..c50df6ccd9aa5 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -1407,6 +1407,10 @@ def MemRef_ReinterpretCastOp "OpFoldResult":$offset, "ArrayRef":$sizes, "ArrayRef":$strides, CArg<"ArrayRef", "{}">:$attrs)>, + // Build a ReinterpretCastOp and infer the result type. + OpBuilder<(ins "Value":$source, "OpFoldResult":$offset, + "ArrayRef":$sizes, "ArrayRef":$strides, + CArg<"ArrayRef", "{}">:$attrs)>, // Build a ReinterpretCastOp with static entries. OpBuilder<(ins "MemRefType":$resultType, "Value":$source, "int64_t":$offset, "ArrayRef":$sizes, diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp index b197786c32054..51dfd84d9ac60 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp @@ -197,8 +197,10 @@ LogicalResult DeallocationState::getMemrefsAndConditionsToDeallocate( // that we can call extract_strided_metadata on it. if (auto unrankedMemRefTy = dyn_cast(memref.getType())) memref = builder.create( - loc, MemRefType::get({}, unrankedMemRefTy.getElementType()), memref, - 0, SmallVector{}, SmallVector{}); + loc, memref, + /*offset=*/builder.getIndexAttr(0), + /*sizes=*/ArrayRef{}, + /*strides=*/ArrayRef{}); // Use the `memref.extract_strided_metadata` operation to get the base // memref. This is needed because the same MemRef that was produced by the diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 9c021d3613f1c..75b9729e63648 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -1832,6 +1832,24 @@ void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, b.getDenseI64ArrayAttr(staticStrides)); } +void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, + Value source, OpFoldResult offset, + ArrayRef sizes, + ArrayRef strides, + ArrayRef attrs) { + auto sourceType = cast(source.getType()); + SmallVector staticOffsets, staticSizes, staticStrides; + SmallVector dynamicOffsets, dynamicSizes, dynamicStrides; + dispatchIndexOpFoldResults(offset, dynamicOffsets, staticOffsets); + dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes); + dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides); + auto stridedLayout = StridedLayoutAttr::get( + b.getContext(), staticOffsets.front(), staticStrides); + auto resultType = MemRefType::get(staticSizes, sourceType.getElementType(), + stridedLayout, sourceType.getMemorySpace()); + build(b, result, resultType, source, offset, sizes, strides, attrs); +} + void ReinterpretCastOp::build(OpBuilder &b, OperationState &result, MemRefType resultType, Value source, int64_t offset, ArrayRef sizes, From b7b945b09cddad128e42d82d759e74f0cd5b0ee3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 25 Sep 2024 11:35:16 +0400 Subject: [PATCH 112/212] LiveRangeCalc: Pass output stream to verify Restores output on this failure after 71ca9fcb8dc9ea0e1e3a4a47820edc78c398a85e --- llvm/lib/CodeGen/LiveRangeCalc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/LiveRangeCalc.cpp b/llvm/lib/CodeGen/LiveRangeCalc.cpp index f7d9e5c44ac2e..e325e77189a6f 100644 --- a/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -208,7 +208,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, #ifndef NDEBUG if (MBB->pred_empty()) { - MBB->getParent()->verify(); + MBB->getParent()->verify(nullptr, nullptr, &errs()); errs() << "Use of " << printReg(PhysReg, MRI->getTargetRegisterInfo()) << " does not have a corresponding definition on every path:\n"; const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); @@ -223,7 +223,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, for (MCRegAliasIterator Alias(PhysReg, TRI, false); !IsLiveIn && Alias.isValid(); ++Alias) IsLiveIn = MBB->isLiveIn(*Alias); if (!IsLiveIn) { - MBB->getParent()->verify(); + MBB->getParent()->verify(nullptr, nullptr, &errs()); errs() << "The register " << printReg(PhysReg, TRI) << " needs to be live in to " << printMBBReference(*MBB) << ", but is missing from the live-in list.\n"; From 8e3cde04cbf13d2aa2251acdd8adbae8d3edd43d Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 25 Sep 2024 15:31:24 +0800 Subject: [PATCH 113/212] [LoongArch][test] Add float-point atomic load/store tests. NFC --- .../ir-instruction/load-store-atomic.ll | 337 ++++++++++++++++++ 1 file changed, 337 insertions(+) diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll index 1af2b38d79943..9ef74e4960ce7 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -88,6 +88,50 @@ define ptr @load_acquire_ptr(ptr %ptr) { ret ptr %val } +define float @load_acquire_float(ptr %ptr) { +; LA32-LABEL: load_acquire_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: dbar 20 +; LA32-NEXT: ret +; +; LA64-LABEL: load_acquire_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: dbar 20 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr acquire, align 8 + ret float %val +} + +define double @load_acquire_double(ptr %ptr) { +; LA32-LABEL: load_acquire_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a1, $zero, 2 +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_acquire_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: dbar 20 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr acquire, align 8 + ret double %val +} + define i8 @load_unordered_i8(ptr %ptr) { ; LA32-LABEL: load_unordered_i8: ; LA32: # %bb.0: @@ -165,6 +209,47 @@ define ptr @load_unordered_ptr(ptr %ptr) { ret ptr %val } +define float @load_unordered_float(ptr %ptr) { +; LA32-LABEL: load_unordered_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr unordered, align 8 + ret float %val +} + +define double @load_unordered_double(ptr %ptr) { +; LA32-LABEL: load_unordered_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr unordered, align 8 + ret double %val +} + define i8 @load_monotonic_i8(ptr %ptr) { ; LA32-LABEL: load_monotonic_i8: ; LA32: # %bb.0: @@ -242,6 +327,47 @@ define ptr @load_monotonic_ptr(ptr %ptr) { ret ptr %val } +define float @load_monotonic_float(ptr %ptr) { +; LA32-LABEL: load_monotonic_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr monotonic, align 8 + ret float %val +} + +define double @load_monotonic_double(ptr %ptr) { +; LA32-LABEL: load_monotonic_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr monotonic, align 8 + ret double %val +} + define i8 @load_seq_cst_i8(ptr %ptr) { ; LA32-LABEL: load_seq_cst_i8: ; LA32: # %bb.0: @@ -328,6 +454,50 @@ define ptr @load_seq_cst_ptr(ptr %ptr) { ret ptr %val } +define float @load_seq_cst_float(ptr %ptr) { +; LA32-LABEL: load_seq_cst_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: movgr2fr.w $fa0, $a0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: movgr2fr.w $fa0, $a0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic float, ptr %ptr seq_cst, align 8 + ret float %val +} + +define double @load_seq_cst_double(ptr %ptr) { +; LA32-LABEL: load_seq_cst_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a1, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_load_8) +; LA32-NEXT: st.w $a1, $sp, 4 +; LA32-NEXT: st.w $a0, $sp, 0 +; LA32-NEXT: fld.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_double: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic double, ptr %ptr seq_cst, align 8 + ret double %val +} + define void @store_release_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_release_i8: ; LA32: # %bb.0: @@ -411,6 +581,48 @@ define void @store_release_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_release_float(ptr %ptr, float %v) { +; LA32-LABEL: store_release_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: dbar 18 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_release_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr release, align 8 + ret void +} + +define void @store_release_double(ptr %ptr, double %v) { +; LA32-LABEL: store_release_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: ori $a3, $zero, 3 +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_release_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr release, align 8 + ret void +} + define void @store_unordered_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_unordered_i8: ; LA32: # %bb.0: @@ -488,6 +700,47 @@ define void @store_unordered_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_unordered_float(ptr %ptr, float %v) { +; LA32-LABEL: store_unordered_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr unordered, align 8 + ret void +} + +define void @store_unordered_double(ptr %ptr, double %v) { +; LA32-LABEL: store_unordered_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr unordered, align 8 + ret void +} + define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_monotonic_i8: ; LA32: # %bb.0: @@ -565,6 +818,47 @@ define void @store_monotonic_ptr(ptr %ptr, ptr %v) { ret void } +define void @store_monotonic_float(ptr %ptr, float %v) { +; LA32-LABEL: store_monotonic_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr monotonic, align 8 + ret void +} + +define void @store_monotonic_double(ptr %ptr, double %v) { +; LA32-LABEL: store_monotonic_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: move $a3, $zero +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr monotonic, align 8 + ret void +} + define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_seq_cst_i8: ; LA32: # %bb.0: @@ -653,3 +947,46 @@ define void @store_seq_cst_ptr(ptr %ptr, ptr %v) { store atomic ptr %v, ptr %ptr seq_cst, align 8 ret void } + +define void @store_seq_cst_float(ptr %ptr, float %v) { +; LA32-LABEL: store_seq_cst_float: +; LA32: # %bb.0: +; LA32-NEXT: movfr2gr.s $a1, $fa0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_float: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.s $a1, $fa0 +; LA64-NEXT: amswap_db.w $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic float %v, ptr %ptr seq_cst, align 8 + ret void +} + +define void @store_seq_cst_double(ptr %ptr, double %v) { +; LA32-LABEL: store_seq_cst_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: fst.d $fa0, $sp, 0 +; LA32-NEXT: ld.w $a1, $sp, 0 +; LA32-NEXT: ld.w $a2, $sp, 4 +; LA32-NEXT: ori $a3, $zero, 5 +; LA32-NEXT: bl %plt(__atomic_store_8) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_double: +; LA64: # %bb.0: +; LA64-NEXT: movfr2gr.d $a1, $fa0 +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic double %v, ptr %ptr seq_cst, align 8 + ret void +} From 4f90e75bdc156d2630da525eb74d00611753c706 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 25 Sep 2024 00:41:49 -0700 Subject: [PATCH 114/212] [AMDGPU] Do not count implicit VGPRs in SIInsertWaitcnts (#109049) When generating waitcounts before a use or def skip VGPRs. We never have a real implicit VGPR operands on memory instructions, it is only for super-reg liveness accounting. Some other instructions (MOVRELS as an example) may have real implicit VGPR uses though. This is less then ideal but most of the problems observed with spills. --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 8 + .../GlobalISel/image-waterfall-loop-O0.ll | 1 - llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 23 +- llvm/test/CodeGen/AMDGPU/div_i128.ll | 96 ++++---- .../CodeGen/AMDGPU/indirect-addressing-si.ll | 57 +++-- llvm/test/CodeGen/AMDGPU/load-global-i16.ll | 11 +- llvm/test/CodeGen/AMDGPU/load-global-i32.ll | 1 - ...uf-legalize-operands-non-ptr-intrinsics.ll | 1 - .../CodeGen/AMDGPU/mubuf-legalize-operands.ll | 1 - llvm/test/CodeGen/AMDGPU/rem_i128.ll | 48 ++-- llvm/test/CodeGen/AMDGPU/spill-wait.mir | 211 +++++++++++++++--- llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir | 2 +- llvm/test/CodeGen/AMDGPU/waitcnt.mir | 2 +- 13 files changed, 332 insertions(+), 130 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index a566827260138..80a7529002ac9 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1752,6 +1752,14 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg()); for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) { if (IsVGPR) { + // Implicit VGPR defs and uses are never a part of the memory + // instructions description and usually present to account for + // super-register liveness. + // TODO: Most of the other instructions also have implicit uses + // for the liveness accounting only. + if (Op.isImplicit() && MI.mayLoadOrStore()) + continue; + // RAW always needs an s_waitcnt. WAW needs an s_waitcnt unless the // previous write and this write are the same type of VMEM // instruction, in which case they are (in some architectures) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index 10cbc56cc5fbe..de973481f8230 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -187,7 +187,6 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_mov_b32_e32 v3, s4 ; CHECK-NEXT: ; kill: killed $vgpr4 ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index 51f9cf73488ee..67a084068941a 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -778,8 +778,8 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_7 ; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then -; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 @@ -824,8 +824,8 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_6 ; GCN-O0-NEXT: ; %bb.5: ; %bb.inner.then2 -; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 @@ -1242,10 +1242,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_7 ; GCN-O0-NEXT: .LBB5_6: ; %Flow @@ -1263,10 +1266,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_5 ; GCN-O0-NEXT: .LBB5_7: ; %bb10 @@ -1336,10 +1342,13 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 @@ -1356,9 +1365,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: s_waitcnt expcnt(2) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 712cecff40617..b541be9f5aa44 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -570,21 +570,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 @@ -599,9 +599,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -663,9 +663,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -1718,17 +1718,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_5 ; GFX9-G-O0-NEXT: .LBB0_3: ; %Flow2 @@ -1743,11 +1747,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_9 ; GFX9-G-O0-NEXT: .LBB0_4: ; %udiv-loop-exit @@ -1822,11 +1828,13 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 @@ -2787,21 +2795,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 @@ -2816,9 +2824,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2880,9 +2888,9 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -3846,17 +3854,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_5 ; GFX9-G-O0-NEXT: .LBB1_3: ; %Flow2 @@ -3871,11 +3883,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_9 ; GFX9-G-O0-NEXT: .LBB1_4: ; %udiv-loop-exit @@ -3950,11 +3964,13 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index b4fe112438b4f..60946956547a7 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -4111,7 +4111,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4137,7 +4136,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload @@ -4146,12 +4144,19 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload @@ -4178,7 +4183,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4597,7 +4601,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -4623,7 +4626,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload @@ -4632,12 +4634,19 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload @@ -4664,7 +4673,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill @@ -5912,7 +5920,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 7 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 8 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Reload @@ -5921,12 +5928,19 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:72 ; 4-byte Folded Reload @@ -5953,7 +5967,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:140 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:144 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:148 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill @@ -6041,7 +6054,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 11 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 12 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Reload @@ -6050,12 +6062,19 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:172 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:176 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:180 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:184 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:188 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:192 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:196 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:200 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:204 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:208 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:212 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:216 ; 4-byte Folded Reload @@ -6082,7 +6101,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:272 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:276 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:280 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Spill @@ -9175,7 +9193,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Reload @@ -9184,12 +9201,19 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:32 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:56 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:60 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:72 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[24:27], 0 offset:76 ; 4-byte Folded Reload @@ -9216,7 +9240,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:136 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:140 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:144 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Spill @@ -9641,7 +9664,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 ; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] -; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill @@ -9667,7 +9689,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v0, 9 ; NOOPT-NEXT: v_readlane_b32 s1, v0, 10 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload @@ -9676,12 +9697,19 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v7, off, s[16:19], 0 offset:24 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v8, off, s[16:19], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) ; NOOPT-NEXT: buffer_load_dword v9, off, s[16:19], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) ; NOOPT-NEXT: buffer_load_dword v10, off, s[16:19], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) ; NOOPT-NEXT: buffer_load_dword v11, off, s[16:19], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) ; NOOPT-NEXT: buffer_load_dword v12, off, s[16:19], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) ; NOOPT-NEXT: buffer_load_dword v13, off, s[16:19], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[16:19], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v15, off, s[16:19], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v16, off, s[16:19], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload @@ -9708,7 +9736,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:200 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:204 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:208 ; 4-byte Folded Spill -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index fe5427048e8cf..e0c2d00891250 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -3569,7 +3569,6 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 @@ -3577,7 +3576,6 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 @@ -4382,7 +4380,6 @@ define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 @@ -7350,12 +7347,15 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_store_dword v13, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v14, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v15, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(3) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v12, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v13, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v14, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) +; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v15, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v39 ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, v39 @@ -7378,7 +7378,6 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(ptr addrspace(1) %ou ; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload -; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload ; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) ; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll index 5ae2b91bdb3e7..4d7f1a9663c3d 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll @@ -3098,7 +3098,6 @@ define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %ou ; SI-NOHSA-NEXT: buffer_load_dword v8, off, s[12:15], 0 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: buffer_load_dword v9, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: buffer_load_dword v10, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload -; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) ; SI-NOHSA-NEXT: buffer_load_dword v11, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload ; SI-NOHSA-NEXT: s_waitcnt vmcnt(0) ; SI-NOHSA-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll index 85d342bf303c0..c302233e748fd 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -244,7 +244,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: ; kill: killed $vgpr1 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index 42ed4c1f2e63d..dd6fd5aa384f6 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -258,7 +258,6 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: ; kill: killed $vgpr1 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll index fd6e06afc67da..19cc60963e900 100644 --- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll @@ -609,21 +609,21 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 @@ -638,9 +638,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -702,9 +702,9 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2007,21 +2007,21 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(6) +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(7) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 @@ -2036,9 +2036,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2100,9 +2100,9 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-wait.mir b/llvm/test/CodeGen/AMDGPU/spill-wait.mir index 8e896252af89b..6983a2742a41c 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-wait.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-wait.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX12 %s # There shall be no S_WAITCNT between two stores. @@ -10,14 +11,27 @@ body: | bb.0: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 - ; GCN-LABEL: name: spill_vgpr_tuple - ; GCN: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: spill_vgpr_tuple + ; GFX9: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: spill_vgpr_tuple + ; GFX12: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 + ; GFX12-NEXT: S_ENDPGM 0 $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 @@ -33,14 +47,27 @@ body: | bb.0: liveins: $vgpr0, $sgpr10_sgpr11 - ; GCN-LABEL: name: load_vcc_wait - ; GCN: liveins: $vgpr0, $sgpr10_sgpr11 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_vcc_wait + ; GFX9: liveins: $vgpr0, $sgpr10_sgpr11 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_vcc_wait + ; GFX12: liveins: $vgpr0, $sgpr10_sgpr11 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0 $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec S_ENDPGM 0 @@ -55,14 +82,27 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 - ; GCN-LABEL: name: load_flat_scr_lo_flat_load_wait - ; GCN: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_flat_scr_lo_flat_load_wait + ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_flat_scr_lo_flat_load_wait + ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -75,15 +115,120 @@ body: | bb.0: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 - ; GCN-LABEL: name: load_flat_scr_lo_scratch_store_wait - ; GCN: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT 0 - ; GCN-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: S_WAITCNT 49279 - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_ENDPGM 0 + ; GFX9-LABEL: name: load_flat_scr_lo_scratch_store_wait + ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX9-NEXT: S_WAITCNT 49279 + ; GFX9-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: load_flat_scr_lo_scratch_store_wait + ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... + +# Check that implicit spill defs do not force wait to zero on the first store + +--- +name: spill_load_store + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + + ; GFX9-LABEL: name: spill_load_store + ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + ; GFX9-NEXT: S_WAITCNT 3955 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: spill_load_store + ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + ; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + ; GFX12-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_WAIT_LOADCNT 3 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_WAIT_LOADCNT 2 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + ; GFX12-NEXT: S_WAIT_LOADCNT 1 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + ; GFX12-NEXT: S_WAIT_LOADCNT 0 + ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec + $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec + $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 + BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 + S_ENDPGM 0 +... + +# Make sure we have wait to mitigate WAW on gfx12 + +--- +name: scratch_load_waw +body: | + bb.0.entry: + liveins: $vgpr0, $sgpr0 + + ; GFX9-LABEL: name: scratch_load_waw + ; GFX9: liveins: $vgpr0, $sgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: S_WAITCNT 0 + ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + ; GFX9-NEXT: S_ENDPGM 0 + ; + ; GFX12-LABEL: name: scratch_load_waw + ; GFX12: liveins: $vgpr0, $sgpr0 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 + ; GFX12-NEXT: S_WAIT_EXPCNT 0 + ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 + ; GFX12-NEXT: S_WAIT_BVHCNT 0 + ; GFX12-NEXT: S_WAIT_KMCNT 0 + ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_WAIT_LOADCNT 0 + ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + ; GFX12-NEXT: S_ENDPGM 0 + $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir index d69cb448b95de..7a807260d142d 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-agpr.mir @@ -321,8 +321,8 @@ body: | ; GCN-NEXT: BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { ; GCN-NEXT: $agpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: } - ; GCN-NEXT: S_WAITCNT 112 ; GCN-NEXT: BUNDLE implicit $agpr0, implicit $vgpr2_vgpr3 { + ; GCN-NEXT: S_WAITCNT 112 ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $agpr0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: } BUNDLE implicit-def $agpr0, implicit $vgpr2_vgpr3 { diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir index 4051be18dd49f..8528de77533bf 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir @@ -301,8 +301,8 @@ body: | # CHECK: BUNDLE implicit-def $vgpr0, implicit $vgpr1_vgpr2 { # CHECK-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, implicit $exec, implicit $flat_scr # CHECK-NEXT: } -# CHECK-NEXT: S_WAITCNT 112 # CHECK-NEXT: BUNDLE implicit $vgpr0, implicit $vgpr1_vgpr2 { +# CHECK-NEXT: S_WAITCNT 112 # CHECK-NEXT: FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr # CHECK-NEXT: } From cda0cb3939308ab8ac81e1898228a3b7744a2801 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 25 Sep 2024 08:56:37 +0100 Subject: [PATCH 115/212] [NFC][LoopVectorize] Remove unused argument from fixupIVUsers (#109789) The VectorHeader argument passed to fixupIVUsers is unused and can be removed. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 09e4d0fcd31f3..5e4f33c55610f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -541,8 +541,8 @@ class InnerLoopVectorizer { /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, BasicBlock *VectorHeader, - VPlan &Plan, VPTransformState &State); + BasicBlock *MiddleBlock, VPlan &Plan, + VPTransformState &State); /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. @@ -2742,8 +2742,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, - BasicBlock *VectorHeader, VPlan &Plan, + BasicBlock *MiddleBlock, VPlan &Plan, VPTransformState &State) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate @@ -2954,8 +2953,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (const auto &Entry : Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()), - IVEndValues[Entry.first], LoopMiddleBlock, - VectorLoop->getHeader(), Plan, State); + IVEndValues[Entry.first], LoopMiddleBlock, Plan, State); } // Fix live-out phis not already fixed earlier. From b8d1bae6484625332ab7596390b855e6dacfead7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 25 Sep 2024 10:14:15 +0200 Subject: [PATCH 116/212] [CmpInstAnalysis] Return decomposed bit test as struct (NFC) (#109819) decomposeBitTestICmp() currently returns the result via two out parameters plus an in-place modification of Pred. This changes it to return an optional struct instead. The motivation here is twofold. First, I'd like to extend this code to handle cases where the comparison is against a value other than zero, which would mean yet another out parameter. Second, while doing that I was badly bitten by the in-place modification, so I'd like to get rid of it. --- llvm/include/llvm/Analysis/CmpInstAnalysis.h | 19 ++++-- llvm/lib/Analysis/CmpInstAnalysis.cpp | 67 ++++++++++--------- llvm/lib/Analysis/InstructionSimplify.cpp | 10 ++- .../InstCombine/InstCombineAndOrXor.cpp | 20 ++++-- .../InstCombine/InstCombineCompares.cpp | 9 ++- .../InstCombine/InstCombineSelect.cpp | 20 +++--- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 15 +++-- 7 files changed, 91 insertions(+), 69 deletions(-) diff --git a/llvm/include/llvm/Analysis/CmpInstAnalysis.h b/llvm/include/llvm/Analysis/CmpInstAnalysis.h index 1d07a0c22887b..406dacd930605 100644 --- a/llvm/include/llvm/Analysis/CmpInstAnalysis.h +++ b/llvm/include/llvm/Analysis/CmpInstAnalysis.h @@ -14,6 +14,7 @@ #ifndef LLVM_ANALYSIS_CMPINSTANALYSIS_H #define LLVM_ANALYSIS_CMPINSTANALYSIS_H +#include "llvm/ADT/APInt.h" #include "llvm/IR/InstrTypes.h" namespace llvm { @@ -91,12 +92,18 @@ namespace llvm { Constant *getPredForFCmpCode(unsigned Code, Type *OpTy, CmpInst::Predicate &Pred); - /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The - /// returned predicate is either == or !=. Returns false if decomposition - /// fails. - bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, - Value *&X, APInt &Mask, - bool LookThroughTrunc = true); + /// Represents the operation icmp (X & Mask) pred 0, where pred can only be + /// eq or ne. + struct DecomposedBitTest { + Value *X; + CmpInst::Predicate Pred; + APInt Mask; + }; + + /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. + std::optional + decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, + bool LookThroughTrunc = true); } // end namespace llvm diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp index a1fa7857764d9..36d7aa510545a 100644 --- a/llvm/lib/Analysis/CmpInstAnalysis.cpp +++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp @@ -73,81 +73,84 @@ Constant *llvm::getPredForFCmpCode(unsigned Code, Type *OpTy, return nullptr; } -bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, - CmpInst::Predicate &Pred, - Value *&X, APInt &Mask, bool LookThruTrunc) { +std::optional +llvm::decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, + bool LookThruTrunc) { using namespace PatternMatch; const APInt *C; if (!match(RHS, m_APIntAllowPoison(C))) - return false; + return std::nullopt; + DecomposedBitTest Result; switch (Pred) { default: - return false; + return std::nullopt; case ICmpInst::ICMP_SLT: // X < 0 is equivalent to (X & SignMask) != 0. if (!C->isZero()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SLE: // X <= -1 is equivalent to (X & SignMask) != 0. if (!C->isAllOnes()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: // X > -1 is equivalent to (X & SignMask) == 0. if (!C->isAllOnes()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_SGE: // X >= 0 is equivalent to (X & SignMask) == 0. if (!C->isZero()) - return false; - Mask = APInt::getSignMask(C->getBitWidth()); - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = APInt::getSignMask(C->getBitWidth()); + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: // X isPowerOf2()) - return false; - Mask = -*C; - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = -*C; + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULE: // X <=u 2^n-1 is equivalent to (X & ~(2^n-1)) == 0. if (!(*C + 1).isPowerOf2()) - return false; - Mask = ~*C; - Pred = ICmpInst::ICMP_EQ; + return std::nullopt; + Result.Mask = ~*C; + Result.Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_UGT: // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0. if (!(*C + 1).isPowerOf2()) - return false; - Mask = ~*C; - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = ~*C; + Result.Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_UGE: // X >=u 2^n is equivalent to (X & ~(2^n-1)) != 0. if (!C->isPowerOf2()) - return false; - Mask = -*C; - Pred = ICmpInst::ICMP_NE; + return std::nullopt; + Result.Mask = -*C; + Result.Pred = ICmpInst::ICMP_NE; break; } + Value *X; if (LookThruTrunc && match(LHS, m_Trunc(m_Value(X)))) { - Mask = Mask.zext(X->getType()->getScalarSizeInBits()); + Result.X = X; + Result.Mask = Result.Mask.zext(X->getType()->getScalarSizeInBits()); } else { - X = LHS; + Result.X = LHS; } - return true; + return Result; } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 32a9f1ab34fb3..90f05d43a2b14 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4624,13 +4624,11 @@ static Value *simplifyCmpSelOfMaxMin(Value *CmpLHS, Value *CmpRHS, static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS, ICmpInst::Predicate Pred, Value *TrueVal, Value *FalseVal) { - Value *X; - APInt Mask; - if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) - return nullptr; + if (auto Res = decomposeBitTestICmp(CmpLHS, CmpRHS, Pred)) + return simplifySelectBitTest(TrueVal, FalseVal, Res->X, &Res->Mask, + Res->Pred == ICmpInst::ICMP_EQ); - return simplifySelectBitTest(TrueVal, FalseVal, X, &Mask, - Pred == ICmpInst::ICMP_EQ); + return nullptr; } /// Try to simplify a select instruction when its condition operand is an diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 80d3adedfc89f..e8c0b00661654 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -181,11 +181,13 @@ static unsigned conjugateICmpMask(unsigned Mask) { // Adapts the external decomposeBitTestICmp for local use. static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, Value *&X, Value *&Y, Value *&Z) { - APInt Mask; - if (!llvm::decomposeBitTestICmp(LHS, RHS, Pred, X, Mask)) + auto Res = llvm::decomposeBitTestICmp(LHS, RHS, Pred); + if (!Res) return false; - Y = ConstantInt::get(X->getType(), Mask); + Pred = Res->Pred; + X = Res->X; + Y = ConstantInt::get(X->getType(), Res->Mask); Z = ConstantInt::get(X->getType(), 0); return true; } @@ -870,11 +872,15 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, APInt &UnsetBitsMask) -> bool { CmpInst::Predicate Pred = ICmp->getPredicate(); // Can it be decomposed into icmp eq (X & Mask), 0 ? - if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), - Pred, X, UnsetBitsMask, - /*LookThroughTrunc=*/false) && - Pred == ICmpInst::ICMP_EQ) + auto Res = + llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), + Pred, /*LookThroughTrunc=*/false); + if (Res && Res->Pred == ICmpInst::ICMP_EQ) { + X = Res->X; + UnsetBitsMask = Res->Mask; return true; + } + // Is it icmp eq (X & Mask), 0 already? const APInt *Mask; if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) && diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 698abbb34c18c..b1215bb4d83b0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5905,11 +5905,10 @@ Instruction *InstCombinerImpl::foldICmpWithTrunc(ICmpInst &ICmp) { // This matches patterns corresponding to tests of the signbit as well as: // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?) // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?) - APInt Mask; - if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) { - Value *And = Builder.CreateAnd(X, Mask); - Constant *Zero = ConstantInt::getNullValue(X->getType()); - return new ICmpInst(Pred, And, Zero); + if (auto Res = decomposeBitTestICmp(Op0, Op1, Pred, /*WithTrunc=*/true)) { + Value *And = Builder.CreateAnd(Res->X, Res->Mask); + Constant *Zero = ConstantInt::getNullValue(Res->X->getType()); + return new ICmpInst(Res->Pred, And, Zero); } unsigned SrcBits = X->getType()->getScalarSizeInBits(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 7476db9ee38f4..3dbe95897d635 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -145,12 +145,15 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, return nullptr; AndMask = *AndRHS; - } else if (decomposeBitTestICmp(Cmp->getOperand(0), Cmp->getOperand(1), - Pred, V, AndMask)) { - assert(ICmpInst::isEquality(Pred) && "Not equality test?"); - if (!AndMask.isPowerOf2()) + } else if (auto Res = decomposeBitTestICmp(Cmp->getOperand(0), + Cmp->getOperand(1), Pred)) { + assert(ICmpInst::isEquality(Res->Pred) && "Not equality test?"); + if (!Res->Mask.isPowerOf2()) return nullptr; + V = Res->X; + AndMask = Res->Mask; + Pred = Res->Pred; CreateAnd = true; } else { return nullptr; @@ -747,12 +750,13 @@ static Value *foldSelectICmpAndBinOp(const ICmpInst *IC, Value *TrueVal, C1Log = C1->logBase2(); } else { - APInt C1; - if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CmpLHS, C1) || - !C1.isPowerOf2()) + auto Res = decomposeBitTestICmp(CmpLHS, CmpRHS, Pred); + if (!Res || !Res->Mask.isPowerOf2()) return nullptr; - C1Log = C1.logBase2(); + CmpLHS = Res->X; + Pred = Res->Pred; + C1Log = Res->Mask.logBase2(); NeedAnd = true; } diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 578d087e470e1..740e1e39b9ee7 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -2464,11 +2464,16 @@ static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, (BitPos = ConstantExpr::getExactLogBase2(cast(BitMask))); }; auto MatchDecomposableConstantBitMask = [&]() { - APInt Mask; - return llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CurrX, Mask) && - ICmpInst::isEquality(Pred) && Mask.isPowerOf2() && - (BitMask = ConstantInt::get(CurrX->getType(), Mask)) && - (BitPos = ConstantInt::get(CurrX->getType(), Mask.logBase2())); + auto Res = llvm::decomposeBitTestICmp(CmpLHS, CmpRHS, Pred); + if (Res && Res->Mask.isPowerOf2()) { + assert(ICmpInst::isEquality(Res->Pred)); + Pred = Res->Pred; + CurrX = Res->X; + BitMask = ConstantInt::get(CurrX->getType(), Res->Mask); + BitPos = ConstantInt::get(CurrX->getType(), Res->Mask.logBase2()); + return true; + } + return false; }; if (!MatchVariableBitMask() && !MatchConstantBitMask() && From c58e51ac9786a335f7ebfe8978605e2379cc2c2a Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 16:21:38 +0800 Subject: [PATCH 117/212] [ConstantFPRange] Suppress unused result warnings. NFC. (#109921) Fixes warnings `error: ignoring return value of function declared with 'nodiscard' attribute [-Werror,-Wunused-result]`. --- llvm/unittests/IR/ConstantFPRangeTest.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index dbb6c4545a183..bf6ea95c00e22 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -414,18 +414,21 @@ TEST_F(ConstantFPRangeTest, Print) { #ifdef GTEST_HAS_DEATH_TEST #ifndef NDEBUG TEST_F(ConstantFPRangeTest, NonCanonicalEmptySet) { - EXPECT_DEATH(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(0.0)), + EXPECT_DEATH((void)(ConstantFPRange::getNonNaN(APFloat(1.0), APFloat(0.0))), "Non-canonical form"); } TEST_F(ConstantFPRangeTest, MismatchedSemantics) { - EXPECT_DEATH(ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(1.0f)), + EXPECT_DEATH((void)(ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(1.0f))), "Should only use the same semantics"); - EXPECT_DEATH(One.contains(APFloat(1.0f)), + EXPECT_DEATH((void)(One.contains(APFloat(1.0f))), "Should only use the same semantics"); ConstantFPRange OneF32 = ConstantFPRange(APFloat(1.0f)); - EXPECT_DEATH(One.contains(OneF32), "Should only use the same semantics"); - EXPECT_DEATH(One.intersectWith(OneF32), "Should only use the same semantics"); - EXPECT_DEATH(One.unionWith(OneF32), "Should only use the same semantics"); + EXPECT_DEATH((void)(One.contains(OneF32)), + "Should only use the same semantics"); + EXPECT_DEATH((void)(One.intersectWith(OneF32)), + "Should only use the same semantics"); + EXPECT_DEATH((void)(One.unionWith(OneF32)), + "Should only use the same semantics"); } #endif #endif From 99cd4cb1231a7209725722e477c2d829a18fb252 Mon Sep 17 00:00:00 2001 From: "Miguel A. Arroyo" Date: Wed, 25 Sep 2024 01:29:40 -0700 Subject: [PATCH 118/212] [LLD][MINGW] Add `--undefined-glob` flag support (#109866) --- lld/MinGW/Driver.cpp | 2 ++ lld/MinGW/Options.td | 2 ++ lld/docs/ReleaseNotes.rst | 1 + lld/test/MinGW/driver.test | 3 +++ 4 files changed, 8 insertions(+) diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index c7d7b9cfca386..553698d4f537f 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -514,6 +514,8 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, for (auto *a : args.filtered(OPT_require_defined)) add("-include:" + StringRef(a->getValue())); + for (auto *a : args.filtered(OPT_undefined_glob)) + add("-includeglob:" + StringRef(a->getValue())); for (auto *a : args.filtered(OPT_undefined)) add("-includeoptional:" + StringRef(a->getValue())); for (auto *a : args.filtered(OPT_delayload)) diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td index 7bd5fb80749da..ff7e21fa808f3 100644 --- a/lld/MinGW/Options.td +++ b/lld/MinGW/Options.td @@ -139,6 +139,8 @@ defm threads defm tsaware: B_disable<"tsaware", "Set the 'Terminal Server aware' flag", "Don't set the 'Terminal Server aware' flag">; defm undefined: Eq<"undefined", "Include symbol in the link, if available">; +defm undefined_glob: EEq<"undefined-glob", "Force undefined symbol during linking">, + MetaVarName<"">; defm whole_archive: B<"whole-archive", "Include all object files for following archives", "No longer include all object files for following archives">; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 6e043773f0037..da93da9196af7 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -45,6 +45,7 @@ COFF Improvements MinGW Improvements ------------------ +* ``--undefined-glob`` is now supported by translating into the ``/includeglob`` flag. MachO Improvements ------------------ diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test index 0dab66b613c77..2831d155fef12 100644 --- a/lld/test/MinGW/driver.test +++ b/lld/test/MinGW/driver.test @@ -249,6 +249,9 @@ REQUIRE-DEFINED: -include:_foo -include:_bar -include:_baz -include:_foo2 RUN: ld.lld -### foo.o -m i386pe -u _foo --undefined _bar -undefined=_baz --undefined=_foo2 -u_foo3 2>&1 | FileCheck -check-prefix=UNDEFINED %s UNDEFINED: -includeoptional:_foo -includeoptional:_bar -includeoptional:_baz -includeoptional:_foo2 -includeoptional:_foo3 +RUN: ld.lld -### foo.o -m i386pe --undefined-glob="_foo*" 2>&1 | FileCheck -check-prefix=UNDEFINED-GLOB %s +UNDEFINED-GLOB: -includeglob:_foo* + RUN: ld.lld -### -m i386pep foo.o -Llibpath 2>&1 | FileCheck -check-prefix LIBPATH %s LIBPATH: -libpath:libpath From ba3d174d3f9ec4fb98a837e4e5089094405e1d33 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 01:30:03 -0700 Subject: [PATCH 119/212] [FuzzMutate] Avoid repeated hash lookups (NFC) (#109903) --- llvm/lib/FuzzMutate/IRMutator.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp index 72e0de5937607..e1fe6c8d89ab0 100644 --- a/llvm/lib/FuzzMutate/IRMutator.cpp +++ b/llvm/lib/FuzzMutate/IRMutator.cpp @@ -623,9 +623,11 @@ void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { auto getAliveChildren = [&AliveInstsLookup](Instruction *I) { SmallSetVector Children; for (Value *U : I->users()) { - Instruction *P = dyn_cast(U); - if (P && AliveInstsLookup.count(P)) - Children.insert(AliveInstsLookup[P]); + if (Instruction *P = dyn_cast(U)) { + auto It = AliveInstsLookup.find(P); + if (It != AliveInstsLookup.end()) + Children.insert(It->second); + } } return Children; }; From ec60030639000daa6d64a92e088d74e65fcfc8a1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 01:30:40 -0700 Subject: [PATCH 120/212] [DWARFLinker] Avoid repeated hash lookups (NFC) (#109904) --- llvm/include/llvm/DWARFLinker/IndexedValuesMap.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h b/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h index fadbeb168b533..5e0779157473e 100644 --- a/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h +++ b/llvm/include/llvm/DWARFLinker/IndexedValuesMap.h @@ -21,11 +21,9 @@ namespace dwarf_linker { template class IndexedValuesMap { public: uint64_t getValueIndex(T Value) { - typename ValueToIndexMapTy::iterator It = ValueToIndexMap.find(Value); - if (It == ValueToIndexMap.end()) { - It = ValueToIndexMap.insert(std::make_pair(Value, Values.size())).first; + auto [It, Inserted] = ValueToIndexMap.try_emplace(Value, Values.size()); + if (Inserted) Values.push_back(Value); - } return It->second; } From c3fc763dc165df36e655bf687b96688c2e7184ec Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 01:31:33 -0700 Subject: [PATCH 121/212] [MCA] Avoid repeated hash lookups (NFC) (#109905) --- llvm/lib/MCA/HardwareUnits/ResourceManager.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp index 82030207eee61..e45bd00f1a292 100644 --- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -322,12 +322,11 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const { uint64_t ResourceMask = llvm::bit_floor(ReadyMask); - auto it = AvailableUnits.find(ResourceMask); - if (it == AvailableUnits.end()) { + auto [it, Inserted] = AvailableUnits.try_emplace(ResourceMask); + if (Inserted) { unsigned Index = getResourceStateIndex(ResourceMask); unsigned NumUnits = llvm::popcount(Resources[Index]->getReadyMask()); - it = - AvailableUnits.insert(std::make_pair(ResourceMask, NumUnits)).first; + it->second = NumUnits; } if (!it->second) { From a36aca5e189e8fd50af32060a25167e52e807f90 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 25 Sep 2024 01:32:31 -0700 Subject: [PATCH 122/212] [sancov] Avoid repeated map lookups (NFC) (#109906) --- llvm/tools/sancov/sancov.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp index dd51226e21311..80f9996ba705b 100644 --- a/llvm/tools/sancov/sancov.cpp +++ b/llvm/tools/sancov/sancov.cpp @@ -323,11 +323,10 @@ static void operator<<(json::OStream &W, for (const auto &Loc : Point->Locs) { if (Loc.FileName != FileName || Loc.FunctionName != FunctionName) continue; - if (WrittenIds.find(Point->Id) != WrittenIds.end()) + if (!WrittenIds.insert(Point->Id).second) continue; // Output : ":". - WrittenIds.insert(Point->Id); W.attribute(Point->Id, (utostr(Loc.Line) + ":" + utostr(Loc.Column))); } @@ -418,9 +417,6 @@ SymbolizedCoverage::read(const std::string &InputFile) { auto LineStr = Loc.substr(0, ColonPos); auto ColStr = Loc.substr(ColonPos + 1, Loc.size()); - if (Points.find(PointId) == Points.end()) - Points.insert(std::make_pair(PointId, CoveragePoint(PointId))); - DILineInfo LineInfo; LineInfo.FileName = Filename; LineInfo.FunctionName = FunctionName; @@ -428,7 +424,8 @@ SymbolizedCoverage::read(const std::string &InputFile) { LineInfo.Line = std::strtoul(LineStr.c_str(), &End, 10); LineInfo.Column = std::strtoul(ColStr.c_str(), &End, 10); - CoveragePoint *CoveragePoint = &Points.find(PointId)->second; + CoveragePoint *CoveragePoint = + &Points.try_emplace(PointId, PointId).first->second; CoveragePoint->Locs.push_back(LineInfo); } } @@ -576,10 +573,8 @@ getCoveragePoints(const std::string &ObjectFile, FrameInfo.FileName = normalizeFilename(FrameInfo.FileName); if (Ig.isIgnorelisted(FrameInfo)) continue; - if (Infos.find(FrameInfo) == Infos.end()) { - Infos.insert(FrameInfo); + if (Infos.insert(FrameInfo).second) Point.Locs.push_back(FrameInfo); - } } Result.push_back(Point); From 850ee790cfcafa39c4f00441e049e2862a597ad8 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Wed, 25 Sep 2024 08:32:44 +0000 Subject: [PATCH 123/212] [Bazel] Port fa824dc0dd960214865b03d8f56b18bb93e4a88b --- utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index bbb0435837e41..62f1c2a50acf7 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -956,6 +956,9 @@ cc_library( ) + [ "include/llvm-c/Comdat.h", "include/llvm-c/DebugInfo.h", + "include/llvm/Analysis/ValueTracking.h", + "include/llvm/Analysis/SimplifyQuery.h", + "include/llvm/Analysis/WithCache.h", ] + [":llvm_intrinsics_headers"], copts = llvm_copts, textual_hdrs = glob(["include/llvm/IR/*.def"]), From d853adee004dcee7d5a4984539c07f08394cf2b4 Mon Sep 17 00:00:00 2001 From: Dominik Montada Date: Wed, 25 Sep 2024 10:47:14 +0200 Subject: [PATCH 124/212] [MIR] Fix return value when computed properties conflict with given prop (#109923) This fixes a test failure when expensive checks are enabled. Use the correct return value when computing machine function properties resulted in an error (e.g. when conflicting with explicitly set values). Without this, the machine verifier would crash even in the presence of parsing errors which should have gently terminated execution. --- llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 +-- .../machine-function-isssa-conflict.mir | 14 ++++++++ .../machine-function-nophis-conflict.mir | 18 ++++++++++ .../machine-function-novregs-conflict.mir | 13 +++++++ ...ptionally-computed-properties-conflict.mir | 35 ------------------- 5 files changed, 47 insertions(+), 37 deletions(-) create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir create mode 100644 llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir delete mode 100644 llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 3a00b8ec4771d..be07fbf478b1d 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -625,10 +625,10 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MRI.freezeReservedRegs(); if (computeFunctionProperties(MF, YamlMF)) - return false; + return true; if (initializeCallSiteInfo(PFS, YamlMF)) - return false; + return true; setupDebugValueTracking(MF, PFS, YamlMF); diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir new file mode 100644 index 0000000000000..362d54db7033f --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-isssa-conflict.mir @@ -0,0 +1,14 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestIsSSAOverrideConflict has explicit property IsSSA, but is not valid SSA +name: TestIsSSAOverrideConflict +isSSA: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF + %0:_(s32) = G_IMPLICIT_DEF +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir new file mode 100644 index 0000000000000..c113ea59a9049 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-nophis-conflict.mir @@ -0,0 +1,18 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestNoPhisOverrideConflict has explicit property NoPhi, but contains at least one PHI +name: TestNoPhisOverrideConflict +noPhis: true +tracksRegLiveness: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF + + bb.1: + %1:_(s32) = PHI %0, %bb.0, %1, %bb.1 + G_BR %bb.1 +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir new file mode 100644 index 0000000000000..5f394a4bbbdb6 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Generic/machine-function-novregs-conflict.mir @@ -0,0 +1,13 @@ +# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +# Test that computed properties are not conflicting with explicitly set +# properties + +--- +# CHECK: error: {{.*}}: TestNoVRegsOverrideConflict has explicit property NoVRegs, but contains virtual registers +name: TestNoVRegsOverrideConflict +noVRegs: true +body: | + bb.0: + %0:_(s32) = G_IMPLICIT_DEF +... diff --git a/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir b/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir deleted file mode 100644 index d8d178d90ae0a..0000000000000 --- a/llvm/test/CodeGen/MIR/Generic/machine-function-optionally-computed-properties-conflict.mir +++ /dev/null @@ -1,35 +0,0 @@ -# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s - -# Test that computed properties are not conflicting with explicitly set -# properties - ---- -# CHECK: error: {{.*}}: TestNoPhisOverrideConflict has explicit property NoPhi, but contains at least one PHI -name: TestNoPhisOverrideConflict -noPhis: true -tracksRegLiveness: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF - - bb.1: - %1:_(s32) = PHI %0, %bb.0, %1, %bb.1 - G_BR %bb.1 -... ---- -# CHECK: error: {{.*}}: TestIsSSAOverrideConflict has explicit property IsSSA, but is not valid SSA -name: TestIsSSAOverrideConflict -isSSA: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF - %0:_(s32) = G_IMPLICIT_DEF -... ---- -# CHECK: error: {{.*}}: TestNoVRegsOverrideConflict has explicit property NoVRegs, but contains virtual registers -name: TestNoVRegsOverrideConflict -noVRegs: true -body: | - bb.0: - %0:_(s32) = G_IMPLICIT_DEF -... From 53907ed5081b6cfde6cbe147ab06a074a4f3e0ed Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 25 Sep 2024 09:50:55 +0100 Subject: [PATCH 125/212] [clang][codegen] Don't mark "int" TBAA on FP libcalls with indirect args (#108853) On some targets, an FP libcall with argument types such as long double will be lowered to pass arguments indirectly via pointers. When this is the case we should not mark the libcall with "int" TBAA as it may lead to incorrect optimizations. Currently, this can be seen for long doubles on x86_64-w64-mingw32. The `load x86_fp80` after the call is (incorrectly) marked with "int" TBAA (overwriting the previous metadata for "long double"). Nothing seems to break due to this currently as the metadata is being incorrectly placed on the load and not the call. But if the metadata is moved to the call (which this patch ensures), LLVM will optimize out the setup for the arguments. --- clang/lib/CodeGen/CGBuiltin.cpp | 27 +- clang/lib/CodeGen/CGExpr.cpp | 6 +- clang/lib/CodeGen/CodeGenFunction.h | 3 +- .../math-libcalls-tbaa-indirect-args.c | 250 ++++++++++++++++++ 4 files changed, 280 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7bbf22569e73c..566252b263680 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -687,12 +687,31 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { return CGF.Builder.CreateICmpSLT(V, Zero); } +/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a +/// hidden pointer). This is used to check annotating FP libcalls (that could +/// set `errno`) with "int" TBAA metadata is safe. If any floating-point +/// arguments are passed indirectly, setup for the call could be incorrectly +/// optimized out. +static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo) { + auto IsIndirect = [&](ABIArgInfo const &info) { + return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca(); + }; + return !IsIndirect(FnInfo.getReturnInfo()) && + llvm::none_of(FnInfo.arguments(), + [&](CGFunctionInfoArgInfo const &ArgInfo) { + return IsIndirect(ArgInfo.info); + }); +} + static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); + llvm::CallBase *callOrInvoke = nullptr; + CGFunctionInfo const *FnInfo = nullptr; RValue Call = - CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); + CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(), + /*Chain=*/nullptr, &callOrInvoke, &FnInfo); if (unsigned BuiltinID = FD->getBuiltinID()) { // Check whether a FP math builtin function, such as BI__builtin_expf @@ -702,12 +721,12 @@ static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, // Restrict to target with errno, for example, MacOS doesn't set errno. // TODO: Support builtin function with complex type returned, eg: cacosh if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno && - !CGF.Builder.getIsFPConstrained() && Call.isScalar()) { + !CGF.Builder.getIsFPConstrained() && Call.isScalar() && + HasNoIndirectArgumentsOrResults(*FnInfo)) { // Emit "int" TBAA metadata on FP math libcalls. clang::QualType IntTy = Context.IntTy; TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy); - Instruction *Inst = cast(Call.getScalarVal()); - CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo); + CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo); } } return Call; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 35b5daaf6d4b5..9166db4c74128 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5932,7 +5932,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Value *Chain, - llvm::CallBase **CallOrInvoke) { + llvm::CallBase **CallOrInvoke, + CGFunctionInfo const **ResolvedFnInfo) { // Get the actual function type. The callee type will always be a pointer to // function type or a block pointer type. assert(CalleeType->isFunctionPointerType() && @@ -6111,6 +6112,9 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( Args, FnType, /*ChainCall=*/Chain); + if (ResolvedFnInfo) + *ResolvedFnInfo = &FnInfo; + // C99 6.5.2.2p6: // If the expression that denotes the called function has a type // that does not include a prototype, [the default argument diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index d4e46f77868ec..8a1f6ff00ada7 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4397,7 +4397,8 @@ class CodeGenFunction : public CodeGenTypeCache { } RValue EmitCall(QualType FnType, const CGCallee &Callee, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Value *Chain = nullptr, - llvm::CallBase **CallOrInvoke = nullptr); + llvm::CallBase **CallOrInvoke = nullptr, + CGFunctionInfo const **ResolvedFnInfo = nullptr); // If a Call or Invoke instruction was emitted for this CallExpr, this method // writes the pointer to `CallOrInvoke` if it's not null. diff --git a/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c new file mode 100644 index 0000000000000..b94f9641decc8 --- /dev/null +++ b/clang/test/CodeGen/math-libcalls-tbaa-indirect-args.c @@ -0,0 +1,250 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "(@powl|@cargl|@ilogbl|!|load|store)" --version 5 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s -check-prefixes=CHECK-WIN64 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-I686 +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple powerpc-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-PPC +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s -check-prefixes=CHECK-ARM +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s -check-prefixes=CHECK-ARM-HF +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s -check-prefixes=CHECK-THUMB +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple aarch64-unknown-unknown -o - | FileCheck %s -check-prefixes=CHECK-AARCH +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple spir -o - | FileCheck %s -check-prefixes=CHECK-SPIR +// RUN: %clang_cc1 %s -O3 -fmath-errno -emit-llvm -triple x86_64-w64-mingw32 -o - | FileCheck %s -check-prefixes=CHECK-MINGW32 + +// This file checks that if arguments/results are passed indirectly (i.e. via +// pointers), then the "int" TBAA metadata is not set on the FP libcall as this +// can lead to optimizations incorrectly optimizing out the setup for the call. + +long double powl(long double a, long double b); + +// CHECK-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-WIN64-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-I686-LABEL: define dso_local x86_fp80 @test_powl( +// CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]], x86_fp80 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-I686: [[CALL:%.*]] = tail call x86_fp80 @powl(x86_fp80 noundef [[A]], x86_fp80 noundef [[B]]) #[[ATTR5:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-PPC-LABEL: define dso_local ppc_fp128 @test_powl( +// CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]], ppc_fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-PPC: [[CALL:%.*]] = tail call ppc_fp128 @powl(ppc_fp128 noundef [[A]], ppc_fp128 noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-ARM-LABEL: define dso_local double @test_powl( +// CHECK-ARM-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-ARM: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-ARM-HF-LABEL: define dso_local double @test_powl( +// CHECK-ARM-HF-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-THUMB-LABEL: define double @test_powl( +// CHECK-THUMB-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA3:![0-9]+]] +// +// CHECK-AARCH-LABEL: define dso_local fp128 @test_powl( +// CHECK-AARCH-SAME: fp128 noundef [[A:%.*]], fp128 noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @powl(fp128 noundef [[A]], fp128 noundef [[B]]) #[[ATTR2:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func double @test_powl( +// CHECK-SPIR-SAME: double noundef [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @powl(double noundef [[A]], double noundef [[B]]) #[[ATTR3:[0-9]+]], !tbaa [[TBAA2:![0-9]+]] +// +// CHECK-MINGW32-LABEL: define dso_local void @test_powl( +// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret(x86_fp80) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[TMP0:%.*]], ptr nocapture noundef readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA3:![0-9]+]] +// CHECK-MINGW32: [[B:%.*]] = load x86_fp80, ptr [[TMP1]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[B]], ptr [[BYVAL_TEMP1:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: call void @powl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr noundef nonnull [[BYVAL_TEMP]], ptr noundef nonnull [[BYVAL_TEMP1]]) #[[ATTR3:[0-9]+]] +// CHECK-MINGW32: [[TMP2:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[TMP2]], ptr [[AGG_RESULT]], align 16, !tbaa [[TBAA3]] +// +long double test_powl(long double a, long double b) { + return powl(a, b); +} + +// CHECK-LABEL: define dso_local { x86_fp80, x86_fp80 } @test_cargl( +// CHECK-SAME: ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 16 [[BYVAL_TEMP]]) #[[ATTR5]] +// +// CHECK-WIN64-LABEL: define dso_local { x86_fp80, x86_fp80 } @test_cargl( +// CHECK-WIN64-SAME: ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-WIN64: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-WIN64: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-WIN64: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-WIN64: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-WIN64: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 16 [[BYVAL_TEMP]]) #[[ATTR5]] +// +// CHECK-I686-LABEL: define dso_local void @test_cargl( +// CHECK-I686-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ x86_fp80, x86_fp80 }) align 4 [[CLD:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// CHECK-I686: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 4 +// CHECK-I686: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 4 +// CHECK-I686: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 4 +// CHECK-I686: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 4 +// CHECK-I686: [[CALL:%.*]] = tail call x86_fp80 @cargl(ptr noundef nonnull byval({ x86_fp80, x86_fp80 }) align 4 [[BYVAL_TEMP]]) #[[ATTR5]] +// CHECK-I686: store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 4 +// CHECK-I686: store x86_fp80 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 4 +// +// CHECK-PPC-LABEL: define dso_local void @test_cargl( +// CHECK-PPC-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ ppc_fp128, ppc_fp128 }) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ ppc_fp128, ppc_fp128 }) align 16 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-PPC: [[CLD_REAL:%.*]] = load ppc_fp128, ptr [[CLD]], align 16 +// CHECK-PPC: [[CLD_IMAG:%.*]] = load ppc_fp128, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-PPC: store ppc_fp128 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-PPC: store ppc_fp128 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-PPC: [[CALL:%.*]] = tail call ppc_fp128 @cargl(ptr noundef nonnull byval({ ppc_fp128, ppc_fp128 }) align 16 [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-PPC: store ppc_fp128 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16 +// CHECK-PPC: store ppc_fp128 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 16 +// +// CHECK-ARM-LABEL: define dso_local void @test_cargl( +// CHECK-ARM-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 [[AGG_RESULT:%.*]], [2 x i64] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-ARM: [[CALL:%.*]] = tail call double @cargl([2 x i64] noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// CHECK-ARM: store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8 +// CHECK-ARM: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 +// +// CHECK-ARM-HF-LABEL: define dso_local { double, double } @test_cargl( +// CHECK-ARM-HF-SAME: { double, double } noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call double @cargl({ double, double } noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-THUMB-LABEL: define { double, double } @test_cargl( +// CHECK-THUMB-SAME: [2 x double] noundef [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call double @cargl([2 x double] noundef [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-AARCH-LABEL: define dso_local { fp128, fp128 } @test_cargl( +// CHECK-AARCH-SAME: [2 x fp128] noundef alignstack(16) [[CLD_COERCE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call fp128 @cargl([2 x fp128] noundef alignstack(16) [[CLD_COERCE]]) #[[ATTR2]], !tbaa [[TBAA2]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func void @test_cargl( +// CHECK-SPIR-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval({ double, double }) align 8 [[CLD:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-SPIR: [[CLD_REAL:%.*]] = load double, ptr [[CLD]], align 8 +// CHECK-SPIR: [[CLD_IMAG:%.*]] = load double, ptr [[CLD_IMAGP:%.*]], align 8 +// CHECK-SPIR: store double [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 8 +// CHECK-SPIR: store double [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 8 +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func double @cargl(ptr noundef nonnull byval({ double, double }) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-SPIR: store double [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 8 +// CHECK-SPIR: store double [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 8 +// +// CHECK-MINGW32-LABEL: define dso_local void @test_cargl( +// CHECK-MINGW32-SAME: ptr dead_on_unwind noalias nocapture writable writeonly sret({ x86_fp80, x86_fp80 }) align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly [[CLD:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-MINGW32: [[CLD_REAL:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-MINGW32: [[CLD_IMAG:%.*]] = load x86_fp80, ptr [[CLD_IMAGP:%.*]], align 16 +// CHECK-MINGW32: store x86_fp80 [[CLD_REAL]], ptr [[BYVAL_TEMP:%.*]], align 16 +// CHECK-MINGW32: store x86_fp80 [[CLD_IMAG]], ptr [[BYVAL_TEMP_IMAGP:%.*]], align 16 +// CHECK-MINGW32: call void @cargl(ptr dead_on_unwind nonnull writable sret(x86_fp80) align 16 [[TMP:%.*]], ptr noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] +// CHECK-MINGW32: [[TMP0:%.*]] = load x86_fp80, ptr [[TMP]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: [[CLD_REAL3:%.*]] = load x86_fp80, ptr [[CLD]], align 16 +// CHECK-MINGW32: [[CLD_IMAG5:%.*]] = load x86_fp80, ptr [[CLD_IMAGP]], align 16 +// CHECK-MINGW32: store x86_fp80 [[MUL_RL:%.*]], ptr [[AGG_RESULT]], align 16 +// CHECK-MINGW32: store x86_fp80 [[MUL_IR:%.*]], ptr [[AGG_RESULT_IMAGP:%.*]], align 16 +// +_Complex long double test_cargl(_Complex long double cld) { + long double v2 = __builtin_cargl(cld); + _Complex long double tmp = v2 * cld; + return tmp; +} + + +int ilogbl(long double a); + +// CHECK-LABEL: define dso_local i32 @test_ilogb( +// CHECK-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] +// +// CHECK-WIN64-LABEL: define dso_local i32 @test_ilogb( +// CHECK-WIN64-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-WIN64: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA2]] +// +// CHECK-I686-LABEL: define dso_local i32 @test_ilogb( +// CHECK-I686-SAME: x86_fp80 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { +// CHECK-I686: [[CALL:%.*]] = tail call i32 @ilogbl(x86_fp80 noundef [[A]]) #[[ATTR5]], !tbaa [[TBAA3]] +// +// CHECK-PPC-LABEL: define dso_local i32 @test_ilogb( +// CHECK-PPC-SAME: ppc_fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-PPC: [[CALL:%.*]] = tail call i32 @ilogbl(ppc_fp128 noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] +// +// CHECK-ARM-LABEL: define dso_local i32 @test_ilogb( +// CHECK-ARM-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-ARM: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-ARM-HF-LABEL: define dso_local i32 @test_ilogb( +// CHECK-ARM-HF-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-ARM-HF: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-THUMB-LABEL: define i32 @test_ilogb( +// CHECK-THUMB-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-THUMB: [[CALL:%.*]] = tail call i32 @ilogbl(double noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA3]] +// +// CHECK-AARCH-LABEL: define dso_local i32 @test_ilogb( +// CHECK-AARCH-SAME: fp128 noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-AARCH: [[CALL:%.*]] = tail call i32 @ilogbl(fp128 noundef [[A]]) #[[ATTR2]], !tbaa [[TBAA2]] +// +// CHECK-SPIR-LABEL: define dso_local spir_func i32 @test_ilogb( +// CHECK-SPIR-SAME: double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-SPIR: [[CALL:%.*]] = tail call spir_func i32 @ilogbl(double noundef [[A]]) #[[ATTR3]], !tbaa [[TBAA2]] +// +// CHECK-MINGW32-LABEL: define dso_local i32 @test_ilogb( +// CHECK-MINGW32-SAME: ptr nocapture noundef readonly [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-MINGW32: [[A:%.*]] = load x86_fp80, ptr [[TMP0]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: store x86_fp80 [[A]], ptr [[BYVAL_TEMP:%.*]], align 16, !tbaa [[TBAA3]] +// CHECK-MINGW32: [[CALL:%.*]] = call i32 @ilogbl(ptr noundef nonnull [[BYVAL_TEMP]]) #[[ATTR3]] +// +int test_ilogb(long double a) { + return ilogbl(a); +} +//. +// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-WIN64: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-WIN64: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-WIN64: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-WIN64: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-I686: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-I686: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-I686: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-I686: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-PPC: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-PPC: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-PPC: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-PPC: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-ARM: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-ARM: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-ARM: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-ARM: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-ARM-HF: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-ARM-HF: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-ARM-HF: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-ARM-HF: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-THUMB: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-THUMB: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0} +// CHECK-THUMB: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-THUMB: [[META6]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-AARCH: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-AARCH: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-AARCH: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-AARCH: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-SPIR: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK-SPIR: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK-SPIR: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK-SPIR: [[META5]] = !{!"Simple C/C++ TBAA"} +//. +// CHECK-MINGW32: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK-MINGW32: [[META4]] = !{!"long double", [[META5:![0-9]+]], i64 0} +// CHECK-MINGW32: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} +// CHECK-MINGW32: [[META6]] = !{!"Simple C/C++ TBAA"} +//. From 445d8b2d10b2bb9a5f50e3fe0671045acd309a04 Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Wed, 25 Sep 2024 09:53:23 +0100 Subject: [PATCH 126/212] [Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction (#97755) This patch adds these intrinsics: // Variants are also available for: _s8 svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0"); according to PR#324[1] [1]ARM-software/acle#324 --- clang/include/clang/Basic/arm_sme.td | 5 ++ .../acle_sme2_luti4_zt.c | 82 +++++++++++++++++++ .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 5 ++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 6 ++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 19 ++++- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- .../AArch64/sme2-intrinsics-write-zt.ll | 17 ++++ 7 files changed, 132 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index ae6b55e98827f..9c9f31f388406 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -817,4 +817,9 @@ multiclass ZAReadzArray{ defm SVREADZ_VG2 : ZAReadzArray<"2">; defm SVREADZ_VG4 : ZAReadzArray<"4">; + +let SMETargetGuard = "sme2,sme-lutv2" in { + def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>; +} + } // let SVETargetGuard = InvalidMode diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c new file mode 100644 index 0000000000000..47f36a8b000c0 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c @@ -0,0 +1,82 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -S -Werror -Wall -o /dev/null %s +// REQUIRES: aarch64-registered-target + +#include + +// CHECK-LABEL: define dso_local @test_luti4_zt_u8_x4( +// CHECK-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-NEXT: ret [[TMP10]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_luti4_zt_u8_x411svuint8x2_t( +// CHECK-CXX-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) +// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) +// CHECK-CXX-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-CXX-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-CXX-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-CXX-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-CXX-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-CXX-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-CXX-NEXT: ret [[TMP10]] +// +svuint8x4_t test_luti4_zt_u8_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { + return svluti4_zt_u8_x4(0, op); +} + +// CHECK-LABEL: define dso_local @test_luti4_zt_s8_x4( +// CHECK-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-NEXT: ret [[TMP10]] +// +// CHECK-CXX-LABEL: define dso_local @_Z19test_luti4_zt_s8_x411svuint8x2_t( +// CHECK-CXX-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) +// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) +// CHECK-CXX-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 +// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-CXX-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 +// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-CXX-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 +// CHECK-CXX-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) +// CHECK-CXX-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 +// CHECK-CXX-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) +// CHECK-CXX-NEXT: ret [[TMP10]] +// +svint8x4_t test_luti4_zt_s8_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { + return svluti4_zt_s8_x4(0, op); +} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index 5de97649af5d3..d9bb6daf974d5 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -350,3 +350,8 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16, svsudot_lane_za32_s8_vg1x2(slice_base, z_s8x2, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } + +void test_luti4_zt_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { + // Check Zt tile 0 + svluti4_zt_u8_x4(1, op); // expected-error {{argument value 1 is outside the valid range [0, 0]}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 8ffa2d0878e11..bf11cb0ad9e0f 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3762,6 +3762,12 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; + + def int_aarch64_sme_luti4_zt_x4 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [ImmArg>, IntrNoMem, IntrHasSideEffects]>; + } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 69806c9c3fdbf..ebd7f938289b5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -401,7 +401,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { } void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc, - uint32_t MaxImm); + uint32_t MaxImm, bool IsMultiVector = false); template bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { @@ -1977,15 +1977,23 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, - unsigned Opc, uint32_t MaxImm) { + unsigned Opc, uint32_t MaxImm, + bool IsMultiVector) { if (ConstantSDNode *Imm = dyn_cast(Node->getOperand(4))) if (Imm->getZExtValue() > MaxImm) return; SDValue ZtValue; + SmallVector Ops; if (!ImmToReg(Node->getOperand(2), ZtValue)) return; - SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; + Ops.push_back(ZtValue); + if (IsMultiVector) { + Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)})); + } else { + Ops.push_back(Node->getOperand(3)); + Ops.push_back(Node->getOperand(4)); + } SDLoc DL(Node); EVT VT = Node->getValueType(0); @@ -5507,6 +5515,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectMultiVectorLuti(Node, 2, Opc, 3); return; } + case Intrinsic::aarch64_sme_luti4_zt_x4: { + // Does not have immediate but it has 2ZPR input + SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 0, true); + return; + } } } break; case ISD::INTRINSIC_WO_CHAIN: { diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index ebe4121c944b1..e2261694d658c 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -940,7 +940,7 @@ defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>; let Predicates = [HasSME2, HasSME_LUTv2] in { defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>; -def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; +def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; } //[HasSME2, HasSME_LUTv2] let Predicates = [HasSME2p1, HasSME_LUTv2] in { diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll new file mode 100644 index 0000000000000..778f31194baf4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s + +target triple = "aarch64-linux" + +define {, , , } @test_luti4_zt_i8( %v0, %v1) #0 { +; CHECK-LABEL: test_luti4_zt_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: luti4 { z0.b - z3.b }, zt0, { z0, z1 } +; CHECK-NEXT: ret + %res = call {, , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, %v0, %v1) + ret {, , , } %res +} + +attributes #0 = { "target-features"="+sme2,+sme-lutv2"} From 5a191e3cd904b5fee202569e54fe04f91f1a697f Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 25 Sep 2024 09:54:49 +0100 Subject: [PATCH 127/212] [RISCV][test] Correct +experimental-ztso attr to +ztso in test This doesn't chane the test output, but means it's now testing what it's supposed to. --- llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll index a9c8a4be7d2b4..b5e892c0ff6ac 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll @@ -3,13 +3,13 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IA %s -; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IA %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s -; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-ztso -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IA %s From f1bbabd6289a351430657a2eb3b57fffa8c6d248 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 25 Sep 2024 10:03:39 +0100 Subject: [PATCH 128/212] [ARM] Lower arm_neon_vbsl to ARMISD::VBSP and fold (vbsl x, y, y) to y (#109761) This helps clean up the patterns a little and will help share combines on both the intrinsic and VBSP. A combine is then added to fold away the VBSP if both the selected operands are the same. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 9 +++++ llvm/lib/Target/ARM/ARMInstrNEON.td | 54 +++++++++++-------------- llvm/test/CodeGen/ARM/vbsl.ll | 5 +-- 3 files changed, 35 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a03928b618df0..f891aece26848 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -17653,6 +17653,11 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, // No immediate versions of these to check for. break; + case Intrinsic::arm_neon_vbsl: { + SDLoc dl(N); + return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + } case Intrinsic::arm_mve_vqdmlah: case Intrinsic::arm_mve_vqdmlash: case Intrinsic::arm_mve_vqrdmlah: @@ -19072,6 +19077,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } + case ARMISD::VBSP: + if (N->getOperand(1) == N->getOperand(2)) + return N->getOperand(1); + return SDValue(); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (N->getConstantOperandVal(1)) { diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index fcabc9076e4d3..48dcbdb137123 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5524,26 +5524,23 @@ def : Pat<(v16i8 (vnotq QPR:$src)), // with different register constraints; it just inserts copies. // That is why pseudo VBSP implemented. Is is expanded later into // VBIT/VBIF/VBSL taking into account register constraints to avoid copies. -def VBSPd - : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), - IIC_VBINiD, "", - [(set DPR:$Vd, - (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def VBSPd : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), + IIC_VBINiD, "", []>; let Predicates = [HasNEON] in { -def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), - (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), +def : Pat<(v8i8 (NEONvbsp (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), - (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), +def : Pat<(v4i16 (NEONvbsp (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), - (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), +def : Pat<(v2i32 (NEONvbsp (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), - (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), +def : Pat<(v2f32 (NEONvbsp (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), - (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), +def : Pat<(v1i64 (NEONvbsp (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd), @@ -5560,26 +5557,23 @@ def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; } -def VBSPq - : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), - IIC_VBINiQ, "", - [(set QPR:$Vd, - (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def VBSPq : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), + IIC_VBINiQ, "", []>; let Predicates = [HasNEON] in { -def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), - (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), +def : Pat<(v16i8 (NEONvbsp (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), - (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), +def : Pat<(v8i16 (NEONvbsp (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), - (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), +def : Pat<(v4i32 (NEONvbsp (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), - (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), +def : Pat<(v4f32 (NEONvbsp (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), - (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), +def : Pat<(v2i64 (NEONvbsp (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd), diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll index d5aaf3e6f30bd..0ef725fc91b54 100644 --- a/llvm/test/CodeGen/ARM/vbsl.ll +++ b/llvm/test/CodeGen/ARM/vbsl.ll @@ -264,8 +264,7 @@ define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounw define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: same_param_all: ; CHECK: @ %bb.0: -; CHECK-NEXT: vorr d0, d1, d1 -; CHECK-NEXT: vbsl d0, d1, d1 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b) ret <8 x i8> %vbsl.i @@ -274,7 +273,7 @@ define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) { define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: same_param_12: ; CHECK: @ %bb.0: -; CHECK-NEXT: vbsl d0, d1, d1 +; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: bx lr %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b) ret <8 x i8> %vbsl.i From bbb3679ad8e5a6d2ed6432a8171465eadf9f73f8 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Wed, 25 Sep 2024 11:06:17 +0200 Subject: [PATCH 129/212] [SPIRV] Fix compilation error 'use of parameter from containing function' when building PR #106429 with gcc (#109924) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It appears that PR https://github.com/llvm/llvm-project/pull/106429 introduced an issue for builds with SPIRV Backend target when building with gcc, e.g.: ``` /llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp:263:36: error: use of parameter from containing function 263 | llvm::SyncScope::ID SubGroup = Ctx.getOrInsertSyncScopeID("subgroup"); | ^~~ /llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp:256:46: note: ‘llvm::LLVMContext& Ctx’ declared here 256 | SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) { ``` This PR fixes this by removing struct and using static const variables instead. --- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index aec144f6f0586..a8016d42b0154 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -254,26 +254,27 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) { } SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) { - static const struct { - // Named by - // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id. - // We don't need aliases for Invocation and CrossDevice, as we already have - // them covered by "singlethread" and "" strings respectively (see - // implementation of LLVMContext::LLVMContext()). - llvm::SyncScope::ID SubGroup = Ctx.getOrInsertSyncScopeID("subgroup"); - llvm::SyncScope::ID WorkGroup = Ctx.getOrInsertSyncScopeID("workgroup"); - llvm::SyncScope::ID Device = Ctx.getOrInsertSyncScopeID("device"); - } SSIDs{}; + // Named by + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id. + // We don't need aliases for Invocation and CrossDevice, as we already have + // them covered by "singlethread" and "" strings respectively (see + // implementation of LLVMContext::LLVMContext()). + static const llvm::SyncScope::ID SubGroup = + Ctx.getOrInsertSyncScopeID("subgroup"); + static const llvm::SyncScope::ID WorkGroup = + Ctx.getOrInsertSyncScopeID("workgroup"); + static const llvm::SyncScope::ID Device = + Ctx.getOrInsertSyncScopeID("device"); if (Id == llvm::SyncScope::SingleThread) return SPIRV::Scope::Invocation; else if (Id == llvm::SyncScope::System) return SPIRV::Scope::CrossDevice; - else if (Id == SSIDs.SubGroup) + else if (Id == SubGroup) return SPIRV::Scope::Subgroup; - else if (Id == SSIDs.WorkGroup) + else if (Id == WorkGroup) return SPIRV::Scope::Workgroup; - else if (Id == SSIDs.Device) + else if (Id == Device) return SPIRV::Scope::Device; return SPIRV::Scope::CrossDevice; } From 706821ba8ff9db829252581dd12d8c5ee2e7b3f0 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:05:33 +0100 Subject: [PATCH 130/212] [lldb][test] Skip TestConcurrentVFork on all Linux And given that it is only for Linux - effectively skip it, but in a way where we don't forget that it's Linux only. See https://github.com/llvm/llvm-project/issues/85084. This test times out on occasion on Arm, AArch64 and X86 Linux, which I saw just today in a buildkite build. Causing a failure that is 1. confusing because the PR wasn't for LLDB and 2. annoying to find in the giant log file (which isn't the test's fault, but it adds to the overhead). It's probably important to have this test running somewhere but right now it's causing too much noise to do so. --- .../fork/concurrent_vfork/TestConcurrentVFork.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py b/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py index dd9500c186b2c..3b5efb834b162 100644 --- a/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py +++ b/lldb/test/API/functionalities/fork/concurrent_vfork/TestConcurrentVFork.py @@ -49,7 +49,7 @@ def follow_child_helper(self, use_fork, call_exec): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_vfork_no_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -59,7 +59,7 @@ def test_follow_parent_vfork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_fork_no_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-parent. @@ -69,7 +69,7 @@ def test_follow_parent_fork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_vfork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -79,7 +79,7 @@ def test_follow_parent_vfork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_parent_fork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-parent. @@ -89,7 +89,7 @@ def test_follow_parent_fork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_vfork_no_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child. @@ -99,7 +99,7 @@ def test_follow_child_vfork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_fork_no_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child. @@ -109,7 +109,7 @@ def test_follow_child_fork_no_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_vfork_call_exec(self): """ Make sure that debugging concurrent vfork() from multiple threads won't crash lldb during follow-child. @@ -119,7 +119,7 @@ def test_follow_child_vfork_call_exec(self): @skipUnlessPlatform(["linux"]) # https://github.com/llvm/llvm-project/issues/85084. - @skipIf(oslist=["linux"], archs=["aarch64", "arm"]) + @skipIf(oslist=["linux"]) def test_follow_child_fork_call_exec(self): """ Make sure that debugging concurrent fork() from multiple threads won't crash lldb during follow-child. From c93e29439b1ab8ef6873c385f152a06e3395cb59 Mon Sep 17 00:00:00 2001 From: dlav-sc Date: Wed, 25 Sep 2024 12:13:40 +0300 Subject: [PATCH 131/212] [lldb] fix vFile:open, vFile:unlink error codes (#106950) This patch makes gdb-server sends only GDB RSP supported error codes during vFile:open and vFile:unlink handling. --- .../GDBRemoteCommunicationServerCommon.cpp | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp index f9d37490e16ae..324db3db7eb4c 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp @@ -496,6 +496,17 @@ GDBRemoteCommunicationServerCommon::Handle_qSpeedTest( return SendErrorResponse(7); } +static GDBErrno system_errno_to_gdb(int err) { + switch (err) { +#define HANDLE_ERRNO(name, value) \ + case name: \ + return GDB_##name; +#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def" + default: + return GDB_EUNKNOWN; + } +} + GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerCommon::Handle_vFile_Open( StringExtractorGDBRemote &packet) { @@ -522,9 +533,7 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open( } else { response.PutCString("-1"); std::error_code code = errorToErrorCode(file.takeError()); - if (code.category() == std::system_category()) { - response.Printf(",%x", code.value()); - } + response.Printf(",%x", system_errno_to_gdb(code.value())); } return SendPacketNoLock(response.GetString()); @@ -534,17 +543,6 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open( return SendErrorResponse(18); } -static GDBErrno system_errno_to_gdb(int err) { - switch (err) { -#define HANDLE_ERRNO(name, value) \ - case name: \ - return GDB_##name; -#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def" - default: - return GDB_EUNKNOWN; - } -} - GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerCommon::Handle_vFile_Close( StringExtractorGDBRemote &packet) { @@ -727,7 +725,8 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_unlink( packet.GetHexByteString(path); Status error(llvm::sys::fs::remove(path)); StreamString response; - response.Printf("F%x,%x", error.GetError(), error.GetError()); + response.Printf("F%x,%x", error.GetError(), + system_errno_to_gdb(error.GetError())); return SendPacketNoLock(response.GetString()); } From 4d459136f50bb931987fd7dbcd29564ee9cc13ae Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:16:58 +0100 Subject: [PATCH 132/212] [llvm][docs] Update the project and runtimes lists (#109788) And add a note to explain which variable to prefer if the project can go in both. --------- Co-authored-by: Nikita Popov --- llvm/docs/CMake.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index b5adb22d8f33b..321bae48594cf 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -577,7 +577,12 @@ enabled sub-projects. Nearly all of these variable names begin with The full list is: - ``clang;clang-tools-extra;cross-project-tests;libc;libclc;lld;lldb;openmp;polly;pstl`` + ``bolt;clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;lld;lldb;mlir;openmp;polly;pstl`` + + .. note:: + Some projects listed here can also go in ``LLVM_ENABLE_RUNTIMES``. They + should only appear in one of the two lists. If a project is a valid possiblity + for both, prefer putting it in ``LLVM_ENABLE_RUNTIMES``. **LLVM_ENABLE_RTTI**:BOOL Build LLVM with run-time type information. Defaults to OFF. @@ -594,7 +599,7 @@ enabled sub-projects. Nearly all of these variable names begin with The full list is: - ``compiler-rt;libc;libcxx;libcxxabi;libunwind;openmp`` + ``libc;libunwind;libcxxabi;pstl;libcxx;compiler-rt;openmp;llvm-libgcc;offload`` To enable all of them, use: From b7ea2643cefe1ad7b1b0feeea3ff64e458eef0a3 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:21:45 +0100 Subject: [PATCH 133/212] [llvm][docs] Fix RISCVUsage docs build error After changes in #109651. Warning, treated as error: /home/davspi01/work/open_source/llvm-project/llvm/docs/RISCVUsage.rst::Anonymous hyperlink mismatch: 1 references but 0 targets. In typical RST fashion, all that was missing was a space between the last word and the opening `<` of the link. --- llvm/docs/RISCVUsage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 7bea64792d472..d22f642865bb3 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -154,7 +154,7 @@ on support follow. ``Za64rs`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zaamo`` Assembly Support ``Zabha`` Supported - ``Zacas`` Supported (`See note<#riscv-zacas-note>`__) + ``Zacas`` Supported (`See note <#riscv-zacas-note>`__) ``Zalrsc`` Assembly Support ``Zama16b`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Zawrs`` Assembly Support From 02f46d7fb8b2a2434b1597fb96f65d7f82f3aeac Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Wed, 25 Sep 2024 09:25:28 +0000 Subject: [PATCH 134/212] Revert "[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction (#97755)" Going to revert to Fix test in clang as it is failing This reverts commit 445d8b2d10b2bb9a5f50e3fe0671045acd309a04. --- clang/include/clang/Basic/arm_sme.td | 5 -- .../acle_sme2_luti4_zt.c | 82 ------------------- .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 5 -- llvm/include/llvm/IR/IntrinsicsAArch64.td | 6 -- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 19 +---- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- .../AArch64/sme2-intrinsics-write-zt.ll | 17 ---- 7 files changed, 4 insertions(+), 132 deletions(-) delete mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c delete mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 9c9f31f388406..ae6b55e98827f 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -817,9 +817,4 @@ multiclass ZAReadzArray{ defm SVREADZ_VG2 : ZAReadzArray<"2">; defm SVREADZ_VG4 : ZAReadzArray<"4">; - -let SMETargetGuard = "sme2,sme-lutv2" in { - def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>; -} - } // let SVETargetGuard = InvalidMode diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c deleted file mode 100644 index 47f36a8b000c0..0000000000000 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c +++ /dev/null @@ -1,82 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX - -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -S -Werror -Wall -o /dev/null %s -// REQUIRES: aarch64-registered-target - -#include - -// CHECK-LABEL: define dso_local @test_luti4_zt_u8_x4( -// CHECK-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) -// CHECK-NEXT: ret [[TMP10]] -// -// CHECK-CXX-LABEL: define dso_local @_Z19test_luti4_zt_u8_x411svuint8x2_t( -// CHECK-CXX-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-CXX-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-CXX-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-CXX-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) -// CHECK-CXX-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-CXX-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) -// CHECK-CXX-NEXT: ret [[TMP10]] -// -svuint8x4_t test_luti4_zt_u8_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { - return svluti4_zt_u8_x4(0, op); -} - -// CHECK-LABEL: define dso_local @test_luti4_zt_s8_x4( -// CHECK-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) -// CHECK-NEXT: ret [[TMP10]] -// -// CHECK-CXX-LABEL: define dso_local @_Z19test_luti4_zt_s8_x411svuint8x2_t( -// CHECK-CXX-SAME: [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] { -// CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP]], i64 16) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, [[TMP0]], [[TMP1]]) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-CXX-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-CXX-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-CXX-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 32) -// CHECK-CXX-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-CXX-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 48) -// CHECK-CXX-NEXT: ret [[TMP10]] -// -svint8x4_t test_luti4_zt_s8_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { - return svluti4_zt_s8_x4(0, op); -} diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index d9bb6daf974d5..5de97649af5d3 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -350,8 +350,3 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16, svsudot_lane_za32_s8_vg1x2(slice_base, z_s8x2, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } - -void test_luti4_zt_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") { - // Check Zt tile 0 - svluti4_zt_u8_x4(1, op); // expected-error {{argument value 1 is outside the valid range [0, 0]}} -} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index bf11cb0ad9e0f..8ffa2d0878e11 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3762,12 +3762,6 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; - - def int_aarch64_sme_luti4_zt_x4 - : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [ImmArg>, IntrNoMem, IntrHasSideEffects]>; - } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ebd7f938289b5..69806c9c3fdbf 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -401,7 +401,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { } void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc, - uint32_t MaxImm, bool IsMultiVector = false); + uint32_t MaxImm); template bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { @@ -1977,23 +1977,15 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, - unsigned Opc, uint32_t MaxImm, - bool IsMultiVector) { + unsigned Opc, uint32_t MaxImm) { if (ConstantSDNode *Imm = dyn_cast(Node->getOperand(4))) if (Imm->getZExtValue() > MaxImm) return; SDValue ZtValue; - SmallVector Ops; if (!ImmToReg(Node->getOperand(2), ZtValue)) return; - Ops.push_back(ZtValue); - if (IsMultiVector) { - Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)})); - } else { - Ops.push_back(Node->getOperand(3)); - Ops.push_back(Node->getOperand(4)); - } + SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)}; SDLoc DL(Node); EVT VT = Node->getValueType(0); @@ -5515,11 +5507,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { SelectMultiVectorLuti(Node, 2, Opc, 3); return; } - case Intrinsic::aarch64_sme_luti4_zt_x4: { - // Does not have immediate but it has 2ZPR input - SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 0, true); - return; - } } } break; case ISD::INTRINSIC_WO_CHAIN: { diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index e2261694d658c..ebe4121c944b1 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -940,7 +940,7 @@ defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>; let Predicates = [HasSME2, HasSME_LUTv2] in { defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>; -def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; +def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; } //[HasSME2, HasSME_LUTv2] let Predicates = [HasSME2p1, HasSME_LUTv2] in { diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll deleted file mode 100644 index 778f31194baf4..0000000000000 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll +++ /dev/null @@ -1,17 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s - -target triple = "aarch64-linux" - -define {, , , } @test_luti4_zt_i8( %v0, %v1) #0 { -; CHECK-LABEL: test_luti4_zt_i8: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: luti4 { z0.b - z3.b }, zt0, { z0, z1 } -; CHECK-NEXT: ret - %res = call {, , , } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, %v0, %v1) - ret {, , , } %res -} - -attributes #0 = { "target-features"="+sme2,+sme-lutv2"} From 5ee2deac0c3b85deaeb0031b4030db99d266abdc Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:27:57 +0100 Subject: [PATCH 135/212] [lldb][AArch64][Linux] Add Floating Point Mode Register (#106695) Otherwise known as FEAT_FPMR. This register controls the behaviour of floating point operations. https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/FPMR--Floating-point-Mode-Register As the current floating point register contexts are fixed size, this has been placed in a new set. Linux kernel patches have landed already, so you can cross check with those. To simplify testing we're not going to do any floating point operations, just read and write from the program and debugger to make sure each sees the other's values correctly. --- .../Python/lldbsuite/test/lldbtest.py | 3 + .../NativeRegisterContextLinux_arm64.cpp | 92 ++++++++++++++++++- .../Linux/NativeRegisterContextLinux_arm64.h | 12 +++ .../Utility/RegisterContextPOSIX_arm64.cpp | 4 + .../Utility/RegisterContextPOSIX_arm64.h | 1 + .../Utility/RegisterInfoPOSIX_arm64.cpp | 33 +++++++ .../Process/Utility/RegisterInfoPOSIX_arm64.h | 7 ++ lldb/test/API/linux/aarch64/fpmr/Makefile | 3 + .../aarch64/fpmr/TestAArch64LinuxFPMR.py | 58 ++++++++++++ lldb/test/API/linux/aarch64/fpmr/main.c | 41 +++++++++ 10 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 lldb/test/API/linux/aarch64/fpmr/Makefile create mode 100644 lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py create mode 100644 lldb/test/API/linux/aarch64/fpmr/main.c diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index df5a110cb5b30..c6b7ce84109c0 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -1370,6 +1370,9 @@ def isAArch64PAuth(self): return True return self.isAArch64() and "paca" in self.getCPUInfo() + def isAArch64FPMR(self): + return self.isAArch64() and "fpmr" in self.getCPUInfo() + def isAArch64Windows(self): """Returns true if the architecture is AArch64 and platform windows.""" if self.getPlatform() == "windows": diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp index 1dd4fd4135133..6056f3001fed6 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp @@ -60,10 +60,16 @@ #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* Tagged address control register */ #endif +#ifndef NT_ARM_FPMR +#define NT_ARM_FPMR 0x40e /* Floating point mode register */ +#endif + #define HWCAP_PACA (1 << 30) #define HWCAP2_MTE (1 << 18) +#define HWCAP2_FPMR (1UL << 48) + using namespace lldb; using namespace lldb_private; using namespace lldb_private::process_linux; @@ -139,8 +145,12 @@ NativeRegisterContextLinux::CreateHostNativeRegisterContextLinux( std::optional auxv_at_hwcap2 = process.GetAuxValue(AuxVector::AUXV_AT_HWCAP2); - if (auxv_at_hwcap2 && (*auxv_at_hwcap2 & HWCAP2_MTE)) - opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskMTE); + if (auxv_at_hwcap2) { + if (*auxv_at_hwcap2 & HWCAP2_MTE) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskMTE); + if (*auxv_at_hwcap2 & HWCAP2_FPMR) + opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskFPMR); + } opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskTLS); @@ -186,6 +196,7 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64( std::fill(m_zt_reg.begin(), m_zt_reg.end(), 0); m_mte_ctrl_reg = 0; + m_fpmr_reg = 0; // 16 is just a maximum value, query hardware for actual watchpoint count m_max_hwp_supported = 16; @@ -201,6 +212,7 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64( m_mte_ctrl_is_valid = false; m_tls_is_valid = false; m_zt_buffer_is_valid = false; + m_fpmr_is_valid = false; // SME adds the tpidr2 register m_tls_size = GetRegisterInfo().IsSSVEPresent() ? sizeof(m_tls_regs) @@ -413,6 +425,14 @@ NativeRegisterContextLinux_arm64::ReadRegister(const RegisterInfo *reg_info, assert(offset < GetSMEPseudoBufferSize()); src = (uint8_t *)GetSMEPseudoBuffer() + offset; } + } else if (IsFPMR(reg)) { + error = ReadFPMR(); + if (error.Fail()) + return error; + + offset = reg_info->byte_offset - GetRegisterInfo().GetFPMROffset(); + assert(offset < GetFPMRBufferSize()); + src = (uint8_t *)GetFPMRBuffer() + offset; } else return Status::FromErrorString( "failed - register wasn't recognized to be a GPR or an FPR, " @@ -626,6 +646,17 @@ Status NativeRegisterContextLinux_arm64::WriteRegister( } else return Status::FromErrorString( "Writing to SVG or SVCR is not supported."); + } else if (IsFPMR(reg)) { + error = ReadFPMR(); + if (error.Fail()) + return error; + + offset = reg_info->byte_offset - GetRegisterInfo().GetFPMROffset(); + assert(offset < GetFPMRBufferSize()); + dst = (uint8_t *)GetFPMRBuffer() + offset; + ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size); + + return WriteFPMR(); } return Status::FromErrorString("Failed to write register value"); @@ -640,6 +671,7 @@ enum RegisterSetType : uint32_t { TLS, SME, // ZA only, because SVCR and SVG are pseudo registers. SME2, // ZT only. + FPMR, }; static uint8_t *AddRegisterSetType(uint8_t *dst, @@ -720,6 +752,13 @@ NativeRegisterContextLinux_arm64::CacheAllRegisters(uint32_t &cached_size) { return error; } + if (GetRegisterInfo().IsFPMRPresent()) { + cached_size += sizeof(RegisterSetType) + GetFPMRBufferSize(); + error = ReadFPMR(); + if (error.Fail()) + return error; + } + // tpidr is always present but tpidr2 depends on SME. cached_size += sizeof(RegisterSetType) + GetTLSBufferSize(); error = ReadTLS(); @@ -823,6 +862,11 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues( GetMTEControlSize()); } + if (GetRegisterInfo().IsFPMRPresent()) { + dst = AddSavedRegisters(dst, RegisterSetType::FPMR, GetFPMRBuffer(), + GetFPMRBufferSize()); + } + dst = AddSavedRegisters(dst, RegisterSetType::TLS, GetTLSBuffer(), GetTLSBufferSize()); @@ -971,6 +1015,11 @@ Status NativeRegisterContextLinux_arm64::WriteAllRegisterValues( GetZTBuffer(), &src, GetZTBufferSize(), m_zt_buffer_is_valid, std::bind(&NativeRegisterContextLinux_arm64::WriteZT, this)); break; + case RegisterSetType::FPMR: + error = RestoreRegisters( + GetFPMRBuffer(), &src, GetFPMRBufferSize(), m_fpmr_is_valid, + std::bind(&NativeRegisterContextLinux_arm64::WriteFPMR, this)); + break; } if (error.Fail()) @@ -1014,6 +1063,10 @@ bool NativeRegisterContextLinux_arm64::IsTLS(unsigned reg) const { return GetRegisterInfo().IsTLSReg(reg); } +bool NativeRegisterContextLinux_arm64::IsFPMR(unsigned reg) const { + return GetRegisterInfo().IsFPMRReg(reg); +} + llvm::Error NativeRegisterContextLinux_arm64::ReadHardwareDebugInfo() { if (!m_refresh_hwdebug_info) { return llvm::Error::success(); @@ -1161,6 +1214,7 @@ void NativeRegisterContextLinux_arm64::InvalidateAllRegisters() { m_mte_ctrl_is_valid = false; m_tls_is_valid = false; m_zt_buffer_is_valid = false; + m_fpmr_is_valid = false; // Update SVE and ZA registers in case there is change in configuration. ConfigureRegisterContext(); @@ -1440,6 +1494,40 @@ Status NativeRegisterContextLinux_arm64::WriteZT() { return WriteRegisterSet(&ioVec, GetZTBufferSize(), NT_ARM_ZT); } +Status NativeRegisterContextLinux_arm64::ReadFPMR() { + Status error; + + if (m_fpmr_is_valid) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetFPMRBuffer(); + ioVec.iov_len = GetFPMRBufferSize(); + + error = ReadRegisterSet(&ioVec, GetFPMRBufferSize(), NT_ARM_FPMR); + + if (error.Success()) + m_fpmr_is_valid = true; + + return error; +} + +Status NativeRegisterContextLinux_arm64::WriteFPMR() { + Status error; + + error = ReadFPMR(); + if (error.Fail()) + return error; + + struct iovec ioVec; + ioVec.iov_base = GetFPMRBuffer(); + ioVec.iov_len = GetFPMRBufferSize(); + + m_fpmr_is_valid = false; + + return WriteRegisterSet(&ioVec, GetFPMRBufferSize(), NT_ARM_FPMR); +} + void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() { // ConfigureRegisterContext gets called from InvalidateAllRegisters // on every stop and configures SVE vector length and whether we are in diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h index 6df7c3beefb82..16190b5492582 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h @@ -84,6 +84,7 @@ class NativeRegisterContextLinux_arm64 bool m_sve_buffer_is_valid; bool m_mte_ctrl_is_valid; bool m_zt_buffer_is_valid; + bool m_fpmr_is_valid; bool m_sve_header_is_valid; bool m_za_buffer_is_valid; @@ -133,6 +134,8 @@ class NativeRegisterContextLinux_arm64 // SME2's ZT is a 512 bit register. std::array m_zt_reg; + uint64_t m_fpmr_reg; + bool IsGPR(unsigned reg) const; bool IsFPR(unsigned reg) const; @@ -174,11 +177,16 @@ class NativeRegisterContextLinux_arm64 // SVCR is a pseudo register and we do not allow writes to it. Status ReadSMEControl(); + Status ReadFPMR(); + + Status WriteFPMR(); + bool IsSVE(unsigned reg) const; bool IsSME(unsigned reg) const; bool IsPAuth(unsigned reg) const; bool IsMTE(unsigned reg) const; bool IsTLS(unsigned reg) const; + bool IsFPMR(unsigned reg) const; uint64_t GetSVERegVG() { return m_sve_header.vl / 8; } @@ -202,6 +210,8 @@ class NativeRegisterContextLinux_arm64 void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); } + void *GetFPMRBuffer() { return &m_fpmr_reg; } + size_t GetSVEHeaderSize() { return sizeof(m_sve_header); } size_t GetPACMaskSize() { return sizeof(m_pac_mask); } @@ -222,6 +232,8 @@ class NativeRegisterContextLinux_arm64 size_t GetZTBufferSize() { return m_zt_reg.size(); } + size_t GetFPMRBufferSize() { return sizeof(m_fpmr_reg); } + llvm::Error ReadHardwareDebugInfo() override; llvm::Error WriteHardwareDebugRegs(DREGType hwbType) override; diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp index 50e25568f2ae0..575e9c8c81cbf 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp @@ -59,6 +59,10 @@ bool RegisterContextPOSIX_arm64::IsMTE(unsigned reg) const { return m_register_info_up->IsMTEReg(reg); } +bool RegisterContextPOSIX_arm64::IsFPMR(unsigned reg) const { + return m_register_info_up->IsFPMRReg(reg); +} + RegisterContextPOSIX_arm64::RegisterContextPOSIX_arm64( lldb_private::Thread &thread, std::unique_ptr register_info) diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h index b1226b25b4be1..35ad56c98a7ae 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h @@ -58,6 +58,7 @@ class RegisterContextPOSIX_arm64 : public lldb_private::RegisterContext { bool IsTLS(unsigned reg) const; bool IsSME(unsigned reg) const; bool IsMTE(unsigned reg) const; + bool IsFPMR(unsigned reg) const; bool IsSVEZ(unsigned reg) const { return m_register_info_up->IsSVEZReg(reg); } bool IsSVEP(unsigned reg) const { return m_register_info_up->IsSVEPReg(reg); } diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp index 9f5872e5de7e9..f51a93e1b2dcb 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp @@ -94,6 +94,9 @@ static lldb_private::RegisterInfo g_register_infos_sme2[] = { {"zt0", nullptr, 64, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8, KIND_ALL_INVALID, nullptr, nullptr, nullptr}}; +static lldb_private::RegisterInfo g_register_infos_fpmr[] = { + DEFINE_EXTENSION_REG(fpmr)}; + // Number of register sets provided by this context. enum { k_num_gpr_registers = gpr_w28 - gpr_x0 + 1, @@ -105,6 +108,7 @@ enum { // SME2's ZT0 will also be added to this set if present. So this number is // only for SME1 registers. k_num_sme_register = 3, + k_num_fpmr_register = 1, k_num_register_sets_default = 2, k_num_register_sets = 3 }; @@ -214,6 +218,9 @@ static const lldb_private::RegisterSet g_reg_set_mte_arm64 = { static const lldb_private::RegisterSet g_reg_set_sme_arm64 = { "Scalable Matrix Extension Registers", "sme", k_num_sme_register, nullptr}; +static const lldb_private::RegisterSet g_reg_set_fpmr_arm64 = { + "Floating Point Mode Register", "fpmr", k_num_fpmr_register, nullptr}; + RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets) : lldb_private::RegisterInfoAndSetInterface(target_arch), @@ -263,6 +270,9 @@ RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64( if (m_opt_regsets.AnySet(eRegsetMaskSSVE)) AddRegSetSME(m_opt_regsets.AnySet(eRegsetMaskZT)); + if (m_opt_regsets.AllSet(eRegsetMaskFPMR)) + AddRegSetFPMR(); + m_register_info_count = m_dynamic_reg_infos.size(); m_register_info_p = m_dynamic_reg_infos.data(); m_register_set_p = m_dynamic_reg_sets.data(); @@ -409,6 +419,21 @@ void RegisterInfoPOSIX_arm64::AddRegSetSME(bool has_zt) { m_dynamic_reg_infos[GetRegNumSVEVG()].invalidate_regs = vg_invalidates; } +void RegisterInfoPOSIX_arm64::AddRegSetFPMR() { + uint32_t fpmr_regnum = m_dynamic_reg_infos.size(); + m_fpmr_regnum_collection.push_back(fpmr_regnum); + m_dynamic_reg_infos.push_back(g_register_infos_fpmr[0]); + m_dynamic_reg_infos[fpmr_regnum].byte_offset = + m_dynamic_reg_infos[fpmr_regnum - 1].byte_offset + + m_dynamic_reg_infos[fpmr_regnum - 1].byte_size; + m_dynamic_reg_infos[fpmr_regnum].kinds[lldb::eRegisterKindLLDB] = fpmr_regnum; + + m_per_regset_regnum_range[m_register_set_count] = + std::make_pair(fpmr_regnum, fpmr_regnum + 1); + m_dynamic_reg_sets.push_back(g_reg_set_fpmr_arm64); + m_dynamic_reg_sets.back().registers = m_fpmr_regnum_collection.data(); +} + uint32_t RegisterInfoPOSIX_arm64::ConfigureVectorLengthSVE(uint32_t sve_vq) { // sve_vq contains SVE Quad vector length in context of AArch64 SVE. // SVE register infos if enabled cannot be disabled by selecting sve_vq = 0. @@ -532,6 +557,10 @@ bool RegisterInfoPOSIX_arm64::IsSMEReg(unsigned reg) const { return llvm::is_contained(m_sme_regnum_collection, reg); } +bool RegisterInfoPOSIX_arm64::IsFPMRReg(unsigned reg) const { + return llvm::is_contained(m_fpmr_regnum_collection, reg); +} + uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEZ0() const { return sve_z0; } uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEFFR() const { return sve_ffr; } @@ -561,3 +590,7 @@ uint32_t RegisterInfoPOSIX_arm64::GetTLSOffset() const { uint32_t RegisterInfoPOSIX_arm64::GetSMEOffset() const { return m_register_info_p[m_sme_regnum_collection[0]].byte_offset; } + +uint32_t RegisterInfoPOSIX_arm64::GetFPMROffset() const { + return m_register_info_p[m_fpmr_regnum_collection[0]].byte_offset; +} \ No newline at end of file diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h index 3b8171042c732..16a951ef0935f 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h +++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h @@ -32,6 +32,7 @@ class RegisterInfoPOSIX_arm64 eRegsetMaskTLS = 16, eRegsetMaskZA = 32, eRegsetMaskZT = 64, + eRegsetMaskFPMR = 128, eRegsetMaskDynamic = ~1, }; @@ -110,6 +111,8 @@ class RegisterInfoPOSIX_arm64 void AddRegSetSME(bool has_zt); + void AddRegSetFPMR(); + uint32_t ConfigureVectorLengthSVE(uint32_t sve_vq); void ConfigureVectorLengthZA(uint32_t za_vq); @@ -128,6 +131,7 @@ class RegisterInfoPOSIX_arm64 bool IsPAuthPresent() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } bool IsMTEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } bool IsTLSPresent() const { return m_opt_regsets.AnySet(eRegsetMaskTLS); } + bool IsFPMRPresent() const { return m_opt_regsets.AnySet(eRegsetMaskFPMR); } bool IsSVEReg(unsigned reg) const; bool IsSVEZReg(unsigned reg) const; @@ -139,6 +143,7 @@ class RegisterInfoPOSIX_arm64 bool IsSMEReg(unsigned reg) const; bool IsSMERegZA(unsigned reg) const; bool IsSMERegZT(unsigned reg) const; + bool IsFPMRReg(unsigned reg) const; uint32_t GetRegNumSVEZ0() const; uint32_t GetRegNumSVEFFR() const; @@ -150,6 +155,7 @@ class RegisterInfoPOSIX_arm64 uint32_t GetMTEOffset() const; uint32_t GetTLSOffset() const; uint32_t GetSMEOffset() const; + uint32_t GetFPMROffset() const; private: typedef std::map> @@ -181,6 +187,7 @@ class RegisterInfoPOSIX_arm64 std::vector m_mte_regnum_collection; std::vector m_tls_regnum_collection; std::vector m_sme_regnum_collection; + std::vector m_fpmr_regnum_collection; }; #endif diff --git a/lldb/test/API/linux/aarch64/fpmr/Makefile b/lldb/test/API/linux/aarch64/fpmr/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py b/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py new file mode 100644 index 0000000000000..5a3b8f501095e --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/TestAArch64LinuxFPMR.py @@ -0,0 +1,58 @@ +""" +Test lldb's ability to read and write the AArch64 FPMR register. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class AArch64LinuxFPMR(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @skipUnlessArch("aarch64") + @skipUnlessPlatform(["linux"]) + def test_fpmr_register(self): + if not self.isAArch64FPMR(): + self.skipTest("FPMR must be present.") + + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line( + self, + "main.c", + line_number("main.c", "// Set break point at this line."), + num_expected_locations=1, + ) + + self.runCmd("run", RUN_SUCCEEDED) + + if self.process().GetState() == lldb.eStateExited: + self.fail("Test program failed to run.") + + self.expect( + "thread list", + STOPPED_DUE_TO_BREAKPOINT, + substrs=["stopped", "stop reason = breakpoint"], + ) + + # This has been set by the program. + expected_fpmr = (0b101010 << 32) | 0b101 + self.expect( + "register read --all", + substrs=["Floating Point Mode Register", f"fpmr = {expected_fpmr:#018x}"], + ) + + # Write a value for the program to find. Same fields but with bit values + # inverted. + new_fpmr = (0b010101 << 32) | 0b010 + self.runCmd(f"register write fpmr {new_fpmr:#x}") + + # This value should be saved and restored after expressions. + self.runCmd("p expr_func()") + self.expect("register read fpmr", substrs=[f"fpmr = {new_fpmr:#018x}"]) + + # 0 means the program found the new value in the sysreg as expected. + self.expect("continue", substrs=["exited with status = 0"]) diff --git a/lldb/test/API/linux/aarch64/fpmr/main.c b/lldb/test/API/linux/aarch64/fpmr/main.c new file mode 100644 index 0000000000000..bdb7d8f40b64d --- /dev/null +++ b/lldb/test/API/linux/aarch64/fpmr/main.c @@ -0,0 +1,41 @@ +#include +#include +#include + +#ifndef HWCAP2_FPMR +#define HWCAP2_FPMR (1UL << 48) +#endif + +uint64_t get_fpmr(void) { + uint64_t fpmr = 0; + __asm__ volatile("mrs %0, s3_3_c4_c4_2" : "=r"(fpmr)); + return fpmr; +} + +void set_fpmr(uint64_t value) { + __asm__ volatile("msr s3_3_c4_c4_2, %0" ::"r"(value)); +} + +// Set F8S1 (bits 0-2) and LSCALE2 (bits 37-32) (to prove we treat fpmr as 64 +// bit). +const uint64_t original_fpmr = (uint64_t)0b101010 << 32 | (uint64_t)0b101; + +void expr_func() { set_fpmr(original_fpmr); } + +int main(int argc, char *argv[]) { + if (!(getauxval(AT_HWCAP2) & HWCAP2_FPMR)) + return 1; + + // As FPMR controls a bunch of floating point options that are quite + // extensive, we're not going to run any floating point ops here. Instead just + // update the value from the debugger and check it from this program, and vice + // versa. + set_fpmr(original_fpmr); + + // Here the debugger checks it read back the value above, then writes in a new + // value. Note that the bits are flipped in the new value. + uint64_t new_fpmr = get_fpmr(); // Set break point at this line. + uint64_t expected_fpmr = ((uint64_t)0b010101 << 32) | (uint64_t)0b010; + + return new_fpmr == expected_fpmr ? 0 : 1; +} From 5a038230b0a61acb9ec6f6fdd380c7d3c8c3d673 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 10:34:21 +0100 Subject: [PATCH 136/212] [llvm][cmake] Error when a runtime is in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES (#109791) The documentation tells you not to do this: https://llvm.org/docs/CMake.html#llvm-related-variables But until now we did not enforce it. ``` $ cmake ../llvm-project/llvm/ -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="pstl" -DLLVM_ENABLE_RUNTIMES="libcxx;pstl" ``` ``` CMake Error at CMakeLists.txt:166 (message): Runtime project "pstl" found in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES. It must only appear in one of them and that one should almost always be LLVM_ENABLE_RUNTIMES. ``` --- llvm/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index c637febce1c1f..330db65e85cab 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -161,6 +161,12 @@ foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) endif() endforeach() +foreach(proj IN LISTS LLVM_ENABLE_RUNTIMES) + if ("${proj}" IN_LIST LLVM_ENABLE_PROJECTS) + message(FATAL_ERROR "Runtime project \"${proj}\" found in LLVM_ENABLE_PROJECTS and LLVM_ENABLE_RUNTIMES. It must only appear in one of them and that one should almost always be LLVM_ENABLE_RUNTIMES.") + endif() +endforeach() + # Set a shorthand option to enable the GPU build of the 'libc' project. option(LIBC_GPU_BUILD "Enable the 'libc' project targeting the GPU" OFF) if(LIBC_GPU_BUILD) From c71b212285bd3b4ba3758d4db042a869f520862e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 25 Sep 2024 11:52:35 +0400 Subject: [PATCH 137/212] ProfDataUtils: Avoid dyn_extract + assert (NFC) --- llvm/lib/IR/ProfDataUtils.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 992ce34e00034..4d2fbdde3f9f0 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -216,8 +216,7 @@ bool extractProfTotalWeight(const MDNode *ProfileData, uint64_t &TotalVal) { if (ProfDataName->getString() == "branch_weights") { unsigned Offset = getBranchWeightOffset(ProfileData); for (unsigned Idx = Offset; Idx < ProfileData->getNumOperands(); ++Idx) { - auto *V = mdconst::dyn_extract(ProfileData->getOperand(Idx)); - assert(V && "Malformed branch_weight in MD_prof node"); + auto *V = mdconst::extract(ProfileData->getOperand(Idx)); TotalVal += V->getValue().getZExtValue(); } return true; From 0ef24aa549536e65fc3b23c4d21b6b76190d416e Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 25 Sep 2024 12:12:27 +0200 Subject: [PATCH 138/212] Fix for logic in combineExtract() (#108208) A (csmith) test case appeared where combineExtract() crashed when the input vector was a bitcast into a vector of i1:s. Fix this by adding a check with canTreatAsByteVector() before the call. --- .../Target/SystemZ/SystemZISelLowering.cpp | 5 +++-- .../SystemZ/DAGCombine_extract_vector_elt.ll | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3dabc5ef540cf..ba105c12bc4e9 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -7361,8 +7361,9 @@ SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( if (auto *IndexN = dyn_cast(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); EVT VecVT = Op0.getValueType(); - return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, - IndexN->getZExtValue(), DCI, false); + if (canTreatAsByteVector(VecVT)) + return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, + IndexN->getZExtValue(), DCI, false); } return SDValue(); } diff --git a/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll b/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll new file mode 100644 index 0000000000000..d568af47dbafd --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/DAGCombine_extract_vector_elt.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z16 < %s | FileCheck %s +; +; Check that DAGCombiner doesn't crash in SystemZ combineExtract() +; when handling EXTRACT_VECTOR_ELT with a vector of i1:s. + +define i32 @fun(i32 %arg) { +; CHECK-LABEL: fun: +entry: + %cc = icmp eq i32 %arg, 0 + br label %loop + +loop: + %P = phi <128 x i1> [ zeroinitializer, %entry ], [ bitcast (<2 x i64> to <128 x i1>), %loop ] + br i1 %cc, label %exit, label %loop + +exit: + %E = extractelement <128 x i1> %P, i64 0 + %Res = zext i1 %E to i32 + ret i32 %Res +} From 0c31ea5a09d854d5891eac40629f6a17a66fdcf7 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Wed, 25 Sep 2024 11:19:05 +0100 Subject: [PATCH 139/212] [Clang][SME2] Use tuple result of SME builtins directly. (#109423) I missed a codepath during PR108008 so SME2/SVE2p1 builtins are converting their struct return type into a large vector, which is causing unnecessary casting via memory. --- clang/lib/CodeGen/CGBuiltin.cpp | 66 +- clang/lib/CodeGen/CodeGenFunction.h | 8 +- .../aarch64-sme2-intrinsics/acle_sme2_clamp.c | 528 +----- .../aarch64-sme2-intrinsics/acle_sme2_cvt.c | 194 +- .../aarch64-sme2-intrinsics/acle_sme2_cvtl.c | 18 +- .../acle_sme2_faminmax.c | 264 +-- .../aarch64-sme2-intrinsics/acle_sme2_frint.c | 176 +- .../acle_sme2_luti2_lane_zt_x2.c | 162 +- .../acle_sme2_luti2_lane_zt_x4.c | 234 +-- .../acle_sme2_luti4_lane_zt_x2.c | 162 +- .../acle_sme2_luti4_lane_zt_x4.c | 182 +- .../aarch64-sme2-intrinsics/acle_sme2_max.c | 1056 +---------- .../aarch64-sme2-intrinsics/acle_sme2_maxnm.c | 352 +--- .../aarch64-sme2-intrinsics/acle_sme2_min.c | 1056 +---------- .../aarch64-sme2-intrinsics/acle_sme2_minnm.c | 352 +--- .../aarch64-sme2-intrinsics/acle_sme2_read.c | 1586 ++--------------- .../acle_sme2_sqdmulh.c | 352 +--- .../acle_sme2_unpkx2.c | 108 +- .../acle_sme2_unpkx4.c | 156 +- .../acle_sme2_vector_add.c | 352 +--- .../acle_sme2_vector_rshl.c | 704 +------- .../acle_sme2_vector_selx2.c | 216 +-- .../acle_sme2_vector_selx4.c | 312 +--- .../acle_sme2_vector_uzpx2.c | 432 +---- .../acle_sme2_vector_uzpx4.c | 624 +------ .../acle_sme2_vector_zipx2.c | 432 +---- .../acle_sme2_vector_zipx4.c | 624 +------ .../acle_sme2p1_movaz.c | 1584 ++-------------- .../acle_sve2p1_pext.c | 144 +- .../acle_sve2p1_while_x2.c | 576 ++---- 30 files changed, 1340 insertions(+), 11672 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 566252b263680..249aead33ad73 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9880,6 +9880,22 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, return C; } +Value *CodeGenFunction::EmitSVEPredicateTupleCast(Value *PredTuple, + llvm::StructType *Ty) { + if (PredTuple->getType() == Ty) + return PredTuple; + + Value *Ret = llvm::PoisonValue::get(Ty); + for (unsigned I = 0; I < Ty->getNumElements(); ++I) { + Value *Pred = Builder.CreateExtractValue(PredTuple, I); + Pred = EmitSVEPredicateCast( + Pred, cast(Ty->getTypeAtIndex(I))); + Ret = Builder.CreateInsertValue(Ret, Pred, I); + } + + return Ret; +} + Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -10386,41 +10402,6 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, return Tuple; } -Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { - // Multi-vector results should be broken up into a single (wide) result - // vector. - auto *StructTy = dyn_cast(Call->getType()); - if (!StructTy) - return Call; - - auto *VTy = dyn_cast(StructTy->getTypeAtIndex(0U)); - if (!VTy) - return Call; - unsigned N = StructTy->getNumElements(); - - // We may need to emit a cast to a svbool_t - bool IsPredTy = VTy->getElementType()->isIntegerTy(1); - unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); - - ScalableVectorType *WideVTy = - ScalableVectorType::get(VTy->getElementType(), MinElts * N); - Value *Ret = llvm::PoisonValue::get(WideVTy); - for (unsigned I = 0; I < N; ++I) { - Value *SRet = Builder.CreateExtractValue(Call, I); - assert(SRet->getType() == VTy && "Unexpected type for result value"); - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - - if (IsPredTy) - SRet = EmitSVEPredicateCast( - SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); - - Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); - } - Call = Ret; - - return Call; -} - void CodeGenFunction::GetAArch64SVEProcessedOperands( unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, SVETypeFlags TypeFlags) { @@ -10551,12 +10532,16 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, getSVEOverloadTypes(TypeFlags, Ty, Ops)); Value *Call = Builder.CreateCall(F, Ops); + if (Call->getType() == Ty) + return Call; + // Predicate results must be converted to svbool_t. - if (auto PredTy = dyn_cast(Call->getType())) - if (PredTy->getScalarType()->isIntegerTy(1)) - Call = EmitSVEPredicateCast(Call, cast(Ty)); + if (auto PredTy = dyn_cast(Ty)) + return EmitSVEPredicateCast(Call, PredTy); + if (auto PredTupleTy = dyn_cast(Ty)) + return EmitSVEPredicateTupleCast(Call, PredTupleTy); - return FormSVEBuiltinResult(Call); + llvm_unreachable("unsupported element count!"); } switch (BuiltinID) { @@ -10888,9 +10873,8 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, TypeFlags.isOverloadNone() ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); - Value *Call = Builder.CreateCall(F, Ops); - return FormSVEBuiltinResult(Call); + return Builder.CreateCall(F, Ops); } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8a1f6ff00ada7..3e2abbd9bc109 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4646,6 +4646,8 @@ class CodeGenFunction : public CodeGenTypeCache { unsigned BuiltinID); llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy); + llvm::Value *EmitSVEPredicateTupleCast(llvm::Value *PredTuple, + llvm::StructType *Ty); llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); @@ -4670,12 +4672,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID); - /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider - /// vector. It extracts the scalable vector from the struct and inserts into - /// the wider vector. This avoids the error when allocating space in llvm - /// for struct of scalable vectors if a function returns struct. - llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); - llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c index 9c639984305d1..1297185c4b50e 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c @@ -24,27 +24,13 @@ // CHECK-LABEL: @test_svclamp_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x210svint8x2_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x2, , )(op1, op2, op3); @@ -52,27 +38,13 @@ svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x211svint16x2_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x2, , )(op1, op2, op3); @@ -80,27 +52,13 @@ svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x211svint32x2_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x2, , )(op1, op2, op3); @@ -108,27 +66,13 @@ svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x211svint64x2_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x2, , )(op1, op2, op3); @@ -139,35 +83,13 @@ svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x410svint8x4_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x4, , )(op1, op2, op3); @@ -175,35 +97,13 @@ svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x411svint16x4_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x4, , )(op1, op2, op3); @@ -211,35 +111,13 @@ svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x411svint32x4_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x4, , )(op1, op2, op3); @@ -247,35 +125,13 @@ svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x411svint64x4_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x4, , )(op1, op2, op3); @@ -288,27 +144,13 @@ svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x211svuint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x2, , )(op1, op2, op3); @@ -316,27 +158,13 @@ svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x212svuint16x2_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x2, , )(op1, op2, op3); @@ -344,27 +172,13 @@ svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x212svuint32x2_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x2, , )(op1, op2, op3); @@ -372,27 +186,13 @@ svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x212svuint64x2_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x2, , )(op1, op2, op3); @@ -403,35 +203,13 @@ svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x411svuint8x4_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x4, , )(op1, op2, op3); @@ -439,35 +217,13 @@ svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x412svuint16x4_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x4, , )(op1, op2, op3); @@ -475,35 +231,13 @@ svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x412svuint32x4_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x4, , )(op1, op2, op3); @@ -511,35 +245,13 @@ svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x412svuint64x4_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x4, , )(op1, op2, op3); @@ -552,27 +264,13 @@ svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x213svfloat16x2_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x2, , )(op1, op2, op3); @@ -580,27 +278,13 @@ svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x213svfloat32x2_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x2, , )(op1, op2, op3); @@ -609,27 +293,13 @@ svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x213svfloat64x2_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x2, , )(op1, op2, op3); @@ -640,35 +310,13 @@ svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x413svfloat16x4_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x4, , )(op1, op2, op3); @@ -676,35 +324,13 @@ svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x413svfloat32x4_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x4, , )(op1, op2, op3); @@ -712,35 +338,13 @@ svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x413svfloat64x4_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x4, , )(op1, op2, op3); @@ -748,27 +352,13 @@ svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x2, , )(op1, op2, op3); @@ -776,35 +366,13 @@ svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, // CHECK-LABEL: @test_svclamp_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svclamp_single_bf16_x4(svbfloat16x4_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x4, , )(op1, op2, op3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index 2d61670fd6049..2851ea9ccd22c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -49,27 +49,13 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // x2 // CHECK-LABEL: @test_svcvt_f32_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x2,,)(zn); @@ -77,27 +63,13 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x2,,)(zn); @@ -105,27 +77,13 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x2,,)(zn); @@ -133,27 +91,13 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x2,,)(zn); @@ -162,35 +106,13 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // x4 // CHECK-LABEL: @test_svcvt_f32_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x4,,)(zn); @@ -198,35 +120,13 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x4,,)(zn); @@ -234,35 +134,13 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x4,,)(zn); @@ -270,35 +148,13 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svcvt_s32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x4,,)(zn); @@ -432,27 +288,13 @@ svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_cvt_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // __attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c index fc5c0376e925e..5189ab4af8327 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_cvtl_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_cvtl_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_cvtl_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvtl_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c index a1540bba2a8a9..d4d423f982e84 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svamax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x2)(zdn, zm); @@ -131,27 +75,13 @@ svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x2)(zdn, zm); @@ -159,27 +89,13 @@ svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x2)(zdn, zm); @@ -189,35 +105,13 @@ svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x4)(zdn, zm); @@ -225,35 +119,13 @@ svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x4)(zdn, zm); @@ -261,35 +133,13 @@ svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x4)(zdn, zm); @@ -297,35 +147,13 @@ svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x4)(zdn, zm); @@ -333,35 +161,13 @@ svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x4)(zdn, zm); @@ -369,35 +175,13 @@ svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svamin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c index abdb5a46d5453..8ab450587fc70 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c @@ -21,27 +21,13 @@ // CHECK-LABEL: @test_svfrinta_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x2)(zn); @@ -49,35 +35,13 @@ svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrinta_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x4)(zn); @@ -87,27 +51,13 @@ svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintam_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_svfrintam_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x2)(zn); @@ -115,35 +65,13 @@ svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintm_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintm_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x4)(zn); @@ -153,27 +81,13 @@ svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x2)(zn); @@ -181,35 +95,13 @@ svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x4)(zn); @@ -219,27 +111,13 @@ svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x2)(zn); @@ -247,35 +125,13 @@ svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svfrintp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c index 6dd55663d7d34..3b17c6d9edb19 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -10,27 +10,13 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x2(0, zn, 7); @@ -39,27 +25,13 @@ svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x2(0, zn, 7); @@ -67,27 +39,13 @@ svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x2(0, zn, 7); @@ -96,27 +54,13 @@ svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x2(0, zn, 7); @@ -124,27 +68,13 @@ svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x2(0, zn, 7); @@ -152,27 +82,13 @@ svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x2(0, zn, 7); @@ -180,27 +96,13 @@ svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x2(0, zn, 7); @@ -208,27 +110,13 @@ svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x2(0, zn, 7); @@ -236,27 +124,13 @@ svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x2(0, zn, 7); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c index 8650ec7f62dd8..38059019737f8 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -10,35 +10,13 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x4(0, zn, 3); @@ -47,35 +25,13 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x4(0, zn, 3); @@ -83,35 +39,13 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x4(0, zn, 3); @@ -119,35 +53,13 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x4(0, zn, 3); @@ -155,35 +67,13 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x4(0, zn, 3); @@ -191,35 +81,13 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x4(0, zn, 3); @@ -227,35 +95,13 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x4(0, zn, 3); @@ -263,35 +109,13 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x4(0, zn, 3); @@ -299,35 +123,13 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x4(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c index f4f11c9fc5b14..db615b3cd1c24 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c @@ -10,27 +10,13 @@ // CHECK-LABEL: @test_svluti4_lane_zt_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u8_x2(0, zn, 3); @@ -39,27 +25,13 @@ svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti4_lane_zt_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s8_x2(0, zn, 3); @@ -67,27 +39,13 @@ svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti4_lane_zt_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x2(0, zn, 3); @@ -96,27 +54,13 @@ svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x2(0, zn, 3); @@ -124,27 +68,13 @@ svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x2(0, zn, 3); @@ -152,27 +82,13 @@ svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti4_lane_zt_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x2(0, zn, 3); @@ -180,27 +96,13 @@ svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti4_lane_zt_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x2(0, zn, 3); @@ -208,27 +110,13 @@ svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x2(0, zn, 3); @@ -236,27 +124,13 @@ svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x2(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c index 16a7421326235..c4c89358c16f8 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c @@ -11,36 +11,14 @@ // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x4(0, zn, 1); @@ -49,36 +27,14 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x4(0, zn, 1); @@ -87,36 +43,14 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_bf16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x4(0, zn, 1); @@ -125,36 +59,14 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x4(0, zn, 1); @@ -163,36 +75,14 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x4(0, zn, 1); @@ -201,36 +91,14 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x4(0, zn, 1); @@ -239,36 +107,14 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x4(0, zn, 1); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c index efc68c0b42334..5d57ffb9bdf8c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c @@ -18,27 +18,13 @@ // CHECK-LABEL: @test_svmax_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x2)(zdn, zm); @@ -46,27 +32,13 @@ svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x2)(zdn, zm); @@ -74,27 +46,13 @@ svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x2)(zdn, zm); @@ -102,27 +60,13 @@ svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x2)(zdn, zm); @@ -130,27 +74,13 @@ svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x2)(zdn, zm); @@ -158,27 +88,13 @@ svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x2)(zdn, zm); @@ -186,27 +102,13 @@ svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x2)(zdn, zm); @@ -214,27 +116,13 @@ svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x2)(zdn, zm); @@ -242,27 +130,13 @@ svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x2)(zdn, zm); @@ -270,27 +144,13 @@ svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x2)(zdn, zm); @@ -298,27 +158,13 @@ svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x2)(zdn, zm); @@ -326,27 +172,13 @@ svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x2)(zdn, zm); @@ -356,35 +188,13 @@ svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x4)(zdn, zm); @@ -392,35 +202,13 @@ svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x4)(zdn, zm); @@ -428,35 +216,13 @@ svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x4)(zdn, zm); @@ -464,35 +230,13 @@ svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x4)(zdn, zm); @@ -500,35 +244,13 @@ svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x4)(zdn, zm); @@ -536,35 +258,13 @@ svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x4)(zdn, zm); @@ -572,35 +272,13 @@ svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x4)(zdn, zm); @@ -608,35 +286,13 @@ svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x4)(zdn, zm); @@ -644,35 +300,13 @@ svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x4)(zdn, zm); @@ -680,35 +314,13 @@ svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x4)(zdn, zm); @@ -716,35 +328,13 @@ svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x4)(zdn, zm); @@ -752,35 +342,13 @@ svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x4)(zdn, zm); @@ -790,27 +358,13 @@ svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x2)(zdn, zm); @@ -818,27 +372,13 @@ svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x2)(zdn, zm); @@ -846,27 +386,13 @@ svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x2)(zdn, zm); @@ -874,27 +400,13 @@ svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x2)(zdn, zm); @@ -902,27 +414,13 @@ svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x2)(zdn, zm); @@ -930,27 +428,13 @@ svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x2)(zdn, zm); @@ -958,27 +442,13 @@ svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x2)(zdn, zm); @@ -986,27 +456,13 @@ svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x2)(zdn, zm); @@ -1014,27 +470,13 @@ svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x2)(zdn, zm); @@ -1042,27 +484,13 @@ svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x2)(zdn, zm); @@ -1070,27 +498,13 @@ svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x2)(zdn, zm); @@ -1098,27 +512,13 @@ svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x2)(zdn, zm); @@ -1128,35 +528,13 @@ svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x4)(zdn, zm); @@ -1164,35 +542,13 @@ svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x4)(zdn, zm); @@ -1200,35 +556,13 @@ svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x4)(zdn, zm); @@ -1236,35 +570,13 @@ svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x4)(zdn, zm); @@ -1272,35 +584,13 @@ svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x4)(zdn, zm); @@ -1308,35 +598,13 @@ svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x4)(zdn, zm); @@ -1344,35 +612,13 @@ svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x4)(zdn, zm); @@ -1380,35 +626,13 @@ svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x4)(zdn, zm); @@ -1416,35 +640,13 @@ svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x4)(zdn, zm); @@ -1452,35 +654,13 @@ svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x4)(zdn, zm); @@ -1488,35 +668,13 @@ svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x4)(zdn, zm); @@ -1524,35 +682,13 @@ svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c index 5d06895497cc7..1d47abe8d487c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svmaxnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x2)(zdn, zm); @@ -133,35 +77,13 @@ svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x4)(zdn, zm); @@ -169,35 +91,13 @@ svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x4)(zdn, zm); @@ -205,35 +105,13 @@ svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x4)(zdn, zm); @@ -241,35 +119,13 @@ svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x4)(zdn, zm); @@ -279,27 +135,13 @@ svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x2)(zdn, zm); @@ -307,27 +149,13 @@ svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x2)(zdn, zm); @@ -335,27 +163,13 @@ svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x2)(zdn, zm); @@ -363,27 +177,13 @@ svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x2)(zdn, zm); @@ -393,35 +193,13 @@ svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x4)(zdn, zm); @@ -429,35 +207,13 @@ svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x4)(zdn, zm); @@ -465,35 +221,13 @@ svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x4)(zdn, zm); @@ -501,35 +235,13 @@ svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmaxnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c index 2fa7feeee404e..4e70a39311664 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c @@ -18,27 +18,13 @@ // CHECK-LABEL: @test_svmin_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x2)(zdn, zm); @@ -46,27 +32,13 @@ svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x2)(zdn, zm); @@ -74,27 +46,13 @@ svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x2)(zdn, zm); @@ -102,27 +60,13 @@ svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x2)(zdn, zm); @@ -130,27 +74,13 @@ svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x2)(zdn, zm); @@ -158,27 +88,13 @@ svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x2)(zdn, zm); @@ -186,27 +102,13 @@ svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x2)(zdn, zm); @@ -214,27 +116,13 @@ svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x2)(zdn, zm); @@ -242,27 +130,13 @@ svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x2)(zdn, zm); @@ -270,27 +144,13 @@ svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x2)(zdn, zm); @@ -298,27 +158,13 @@ svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x2)(zdn, zm); @@ -326,27 +172,13 @@ svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x2)(zdn, zm); @@ -356,35 +188,13 @@ svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x4)(zdn, zm); @@ -392,35 +202,13 @@ svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x4)(zdn, zm); @@ -428,35 +216,13 @@ svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x4)(zdn, zm); @@ -464,35 +230,13 @@ svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x4)(zdn, zm); @@ -500,35 +244,13 @@ svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x4)(zdn, zm); @@ -536,35 +258,13 @@ svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x4)(zdn, zm); @@ -572,35 +272,13 @@ svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x4)(zdn, zm); @@ -608,35 +286,13 @@ svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x4)(zdn, zm); @@ -644,35 +300,13 @@ svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x4)(zdn, zm); @@ -680,35 +314,13 @@ svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x4)(zdn, zm); @@ -716,35 +328,13 @@ svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x4)(zdn, zm); @@ -752,35 +342,13 @@ svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x4)(zdn, zm); @@ -790,27 +358,13 @@ svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x2)(zdn, zm); @@ -818,27 +372,13 @@ svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x2)(zdn, zm); @@ -846,27 +386,13 @@ svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x2)(zdn, zm); @@ -874,27 +400,13 @@ svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x2)(zdn, zm); @@ -902,27 +414,13 @@ svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x2)(zdn, zm); @@ -930,27 +428,13 @@ svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x2)(zdn, zm); @@ -958,27 +442,13 @@ svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x2)(zdn, zm); @@ -986,27 +456,13 @@ svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x2)(zdn, zm); @@ -1014,27 +470,13 @@ svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x2)(zdn, zm); @@ -1042,27 +484,13 @@ svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x2)(zdn, zm); @@ -1070,27 +498,13 @@ svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x2)(zdn, zm); @@ -1098,27 +512,13 @@ svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x2)(zdn, zm); @@ -1128,35 +528,13 @@ svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x4)(zdn, zm); @@ -1164,35 +542,13 @@ svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x4)(zdn, zm); @@ -1200,35 +556,13 @@ svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x4)(zdn, zm); @@ -1236,35 +570,13 @@ svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x4)(zdn, zm); @@ -1272,35 +584,13 @@ svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x4)(zdn, zm); @@ -1308,35 +598,13 @@ svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x4)(zdn, zm); @@ -1344,35 +612,13 @@ svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x4)(zdn, zm); @@ -1380,35 +626,13 @@ svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x4)(zdn, zm); @@ -1416,35 +640,13 @@ svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x4)(zdn, zm); @@ -1452,35 +654,13 @@ svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x4)(zdn, zm); @@ -1488,35 +668,13 @@ svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x4)(zdn, zm); @@ -1524,35 +682,13 @@ svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svmin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c index 71b8914b816ca..838cb644e5e39 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svminnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x2)(zdn, zm); @@ -47,27 +33,13 @@ svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x2)(zdn, zm); @@ -75,27 +47,13 @@ svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x2)(zdn, zm); @@ -103,27 +61,13 @@ svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x2)(zdn, zm); @@ -133,35 +77,13 @@ svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x4)(zdn, zm); @@ -169,35 +91,13 @@ svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x4)(zdn, zm); @@ -205,35 +105,13 @@ svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x4)(zdn, zm); @@ -241,35 +119,13 @@ svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x4)(zdn, zm); @@ -279,27 +135,13 @@ svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x2)(zdn, zm); @@ -307,27 +149,13 @@ svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x2)(zdn, zm); @@ -335,27 +163,13 @@ svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x2)(zdn, zm); @@ -363,27 +177,13 @@ svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x2)(zdn, zm); @@ -393,35 +193,13 @@ svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x4)(zdn, zm); @@ -429,35 +207,13 @@ svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x4)(zdn, zm); @@ -465,35 +221,13 @@ svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x4)(zdn, zm); @@ -501,35 +235,13 @@ svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svminnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index da17c6b13d17c..b8cd1e1653ea9 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -9,27 +9,13 @@ // CHECK-LABEL: @test_svread_ver_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg2(0, base); @@ -37,27 +23,13 @@ svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg2(0, base); @@ -65,27 +37,13 @@ svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg2(0, base); @@ -93,27 +51,13 @@ svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg2(0, base); @@ -121,35 +65,13 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg4(0, base); @@ -157,35 +79,13 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg4(0, base); @@ -193,35 +93,13 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_ver_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg4(0, base); @@ -229,35 +107,13 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg4(0, base); @@ -265,27 +121,13 @@ svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg2(1, base); @@ -293,27 +135,13 @@ svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg2(1, base); @@ -321,27 +149,13 @@ svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg2(1, base); @@ -349,27 +163,13 @@ svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg2(1, base); @@ -377,27 +177,13 @@ svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg2(1, base); @@ -405,27 +191,13 @@ svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg2(1, base); @@ -433,27 +205,13 @@ svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg2(1, base); @@ -461,27 +219,13 @@ svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg2(1, base); @@ -489,35 +233,13 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg4(1, base); @@ -525,35 +247,13 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg4(1, base); @@ -561,35 +261,13 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg4(1, base); @@ -597,35 +275,13 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg4(1, base); @@ -633,35 +289,13 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg4(1, base); @@ -669,35 +303,13 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg4(1, base); @@ -705,35 +317,13 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg4(1, base); @@ -741,35 +331,13 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg4(1, base); @@ -777,27 +345,13 @@ svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg2(3, base); @@ -805,27 +359,13 @@ svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg2(3, base); @@ -833,27 +373,13 @@ svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg2(3, base); @@ -861,27 +387,13 @@ svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg2(3, base); @@ -889,27 +401,13 @@ svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg2(3, base); @@ -917,27 +415,13 @@ svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg2(3, base); @@ -945,35 +429,13 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg4(3, base); @@ -981,35 +443,13 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg4(3, base); @@ -1017,35 +457,13 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg4(3, base); @@ -1053,35 +471,13 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg4(3, base); @@ -1089,35 +485,13 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg4(3, base); @@ -1125,35 +499,13 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg4(3, base); @@ -1161,27 +513,13 @@ svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg2(7, base); @@ -1189,27 +527,13 @@ svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg2(7, base); @@ -1217,27 +541,13 @@ svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg2(7, base); @@ -1245,27 +555,13 @@ svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg2(7, base); @@ -1273,55 +569,28 @@ svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg2(7, base); } +// // CHECK-LABEL: @test_svread_ver_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg2(7, base); @@ -1329,35 +598,13 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg4(7, base); @@ -1365,35 +612,13 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg4(7, base); @@ -1401,35 +626,13 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg4(7, base); @@ -1437,35 +640,13 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg4(7, base); @@ -1473,35 +654,13 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg4(7, base); @@ -1509,35 +668,13 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg4(7, base); @@ -1545,27 +682,13 @@ svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za8_s8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x2(base); @@ -1573,27 +696,13 @@ svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x2(base); @@ -1601,56 +710,27 @@ svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x2(base); } -// // CHECK-LABEL: @test_svread_za16_u16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x2(base); @@ -1658,27 +738,13 @@ svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x2(base); @@ -1686,27 +752,13 @@ svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x2(base); @@ -1714,27 +766,13 @@ svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x2(base); @@ -1742,27 +780,13 @@ svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x2(base); @@ -1770,27 +794,13 @@ svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x2(base); @@ -1798,27 +808,13 @@ svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x2(base); @@ -1826,27 +822,13 @@ svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x2(base); @@ -1854,27 +836,13 @@ svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x2j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x2(base); @@ -1882,35 +850,13 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za8_s8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x4(base); @@ -1918,35 +864,13 @@ svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x4(base); @@ -1954,35 +878,13 @@ svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x4(base); @@ -1990,35 +892,13 @@ svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za16_u16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x4(base); @@ -2026,35 +906,13 @@ svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x4(base); @@ -2062,35 +920,13 @@ svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x4(base); @@ -2098,35 +934,13 @@ svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x4(base); @@ -2134,35 +948,13 @@ svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x4(base); @@ -2170,35 +962,13 @@ svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x4(base); @@ -2206,35 +976,13 @@ svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x4(base); @@ -2242,35 +990,13 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x4(base); @@ -2278,35 +1004,13 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x4j( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x4(base); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c index 26804866a7563..5ff801666df88 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svqdmulh_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x2,,,)(zdn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x2,,,)(zdn, zm); @@ -75,27 +47,13 @@ svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x2,,,)(zdn, zm); @@ -103,27 +61,13 @@ svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x2,,,)(zdn, zm); @@ -133,35 +77,13 @@ svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x4,,,)(zdn, zm); @@ -169,35 +91,13 @@ svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x4,,,)(zdn, zm); @@ -205,35 +105,13 @@ svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x4,,,)(zdn, zm); @@ -241,35 +119,13 @@ svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x4,,,)(zdn, zm); @@ -279,27 +135,13 @@ svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x2,,,)(zdn, zm); @@ -307,27 +149,13 @@ svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x2,,,)(zdn, zm); @@ -335,27 +163,13 @@ svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x2,,,)(zdn, zm); @@ -363,27 +177,13 @@ svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x2,,,)(zdn, zm); @@ -393,35 +193,13 @@ svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x4,,,)(zdn, zm); @@ -429,35 +207,13 @@ svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x4,,,)(zdn, zm); @@ -465,35 +221,13 @@ svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x4,,,)(zdn, zm); @@ -501,35 +235,13 @@ svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svqdmulh_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c index fa66c4ff19014..d3b09f071c58f 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svunpk_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x2u10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x2)(zn); @@ -47,27 +33,13 @@ svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x2u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x2)(zn); @@ -75,27 +47,13 @@ svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x2u11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x2)(zn); @@ -103,27 +61,13 @@ svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x2u12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x2)(zn); @@ -131,27 +75,13 @@ svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x2u11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x2)(zn); @@ -159,27 +89,13 @@ svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x2u12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svunpk_u64_x2(svuint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c index 61718f0984ef3..45bc83eac7339 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svunpk_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x410svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x4)(zn); @@ -55,35 +33,13 @@ svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x411svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x4)(zn); @@ -91,35 +47,13 @@ svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x411svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x4)(zn); @@ -127,35 +61,13 @@ svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x412svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x4)(zn); @@ -163,35 +75,13 @@ svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x411svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x4)(zn); @@ -199,35 +89,13 @@ svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x412svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svunpk_u64_x4(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c index c118a7192c6ca..de983bcf79309 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c @@ -25,27 +25,13 @@ // CHECK-LABEL: @test_svadd_vector_single2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_s810svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); @@ -53,27 +39,13 @@ svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_u811svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); @@ -81,27 +53,13 @@ svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s1611svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); @@ -109,27 +67,13 @@ svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u1612svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); @@ -137,27 +81,13 @@ svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s3211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); @@ -165,27 +95,13 @@ svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u3212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); @@ -193,27 +109,13 @@ svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s6411svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); @@ -221,27 +123,13 @@ svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u6412svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); @@ -252,35 +140,13 @@ svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_s810svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); @@ -288,35 +154,13 @@ svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_u811svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); @@ -324,35 +168,13 @@ svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s1611svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); @@ -360,35 +182,13 @@ svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u1612svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); @@ -396,35 +196,13 @@ svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s3211svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); @@ -432,35 +210,13 @@ svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u3212svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); @@ -468,35 +224,13 @@ svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s6411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); @@ -504,35 +238,13 @@ svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u6412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c index 87160444e3c0d..af5a389c7f736 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svrshl_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x2,,,)(zdn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x2,,,)(zdn, zm); @@ -75,27 +47,13 @@ svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x2,,,)(zdn, zm); @@ -103,27 +61,13 @@ svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x2,,,)(zdn, zm); @@ -131,27 +75,13 @@ svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x2,,,)(zdn, zm); @@ -159,27 +89,13 @@ svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x2,,,)(zdn, zm); @@ -187,27 +103,13 @@ svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x2,,,)(zdn, zm); @@ -215,27 +117,13 @@ svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x2,,,)(zdn, zm); @@ -245,35 +133,13 @@ svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x4,,,)(zdn, zm); @@ -281,35 +147,13 @@ svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x4,,,)(zdn, zm); @@ -317,35 +161,13 @@ svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x4,,,)(zdn, zm); @@ -353,35 +175,13 @@ svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x4,,,)(zdn, zm); @@ -389,35 +189,13 @@ svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x4,,,)(zdn, zm); @@ -425,35 +203,13 @@ svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x4,,,)(zdn, zm); @@ -461,35 +217,13 @@ svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x4,,,)(zdn, zm); @@ -497,35 +231,13 @@ svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x4,,,)(zdn, zm); @@ -535,27 +247,13 @@ svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_multi_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x2,,,)(zdn, zm); @@ -563,27 +261,13 @@ svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x2,,,)(zdn, zm); @@ -591,27 +275,13 @@ svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x2,,,)(zdn, zm); @@ -619,27 +289,13 @@ svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x2,,,)(zdn, zm); @@ -647,27 +303,13 @@ svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x2,,,)(zdn, zm); @@ -675,27 +317,13 @@ svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x2,,,)(zdn, zm); @@ -703,27 +331,13 @@ svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x2,,,)(zdn, zm); @@ -731,27 +345,13 @@ svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x2,,,)(zdn, zm); @@ -761,35 +361,13 @@ svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x4,,,)(zdn, zm); @@ -797,35 +375,13 @@ svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x4,,,)(zdn, zm); @@ -833,35 +389,13 @@ svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x4,,,)(zdn, zm); @@ -869,35 +403,13 @@ svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x4,,,)(zdn, zm); @@ -905,35 +417,13 @@ svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x4,,,)(zdn, zm); @@ -941,35 +431,13 @@ svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x4,,,)(zdn, zm); @@ -977,35 +445,13 @@ svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x4,,,)(zdn, zm); @@ -1013,35 +459,13 @@ svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svrshl_multi_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c index a95f89faf7783..4047b2fbd1965 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svsel_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x2u11__SVCount_t10svint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x2)(pn, zn, zm); @@ -47,27 +33,13 @@ svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_st // CHECK-LABEL: @test_svsel_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x2u11__SVCount_t11svuint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x2)(pn, zn, zm); @@ -77,27 +49,13 @@ svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm // CHECK-LABEL: @test_svsel_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x2u11__SVCount_t11svint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x2)(pn, zn, zm); @@ -105,27 +63,13 @@ svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __ar // CHECK-LABEL: @test_svsel_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x2u11__SVCount_t12svuint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x2)(pn, zn, zm); @@ -133,27 +77,13 @@ svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) _ // CHECK-LABEL: @test_svsel_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x2u11__SVCount_t13svfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x2)(pn, zn, zm); @@ -161,27 +91,13 @@ svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm // CHECK-LABEL: @test_svsel_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x2u11__SVCount_t14svbfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x2)(pn, zn, zm); @@ -191,27 +107,13 @@ svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_ // CHECK-LABEL: @test_svsel_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x2u11__SVCount_t11svint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x2)(pn, zn, zm); @@ -219,27 +121,13 @@ svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __ar // CHECK-LABEL: @test_svsel_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x2u11__SVCount_t12svuint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x2)(pn, zn, zm); @@ -247,27 +135,13 @@ svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) _ // CHECK-LABEL: @test_svsel_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x2u11__SVCount_t13svfloat32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x2)(pn, zn, zm); @@ -277,27 +151,13 @@ svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm // CHECK-LABEL: @test_svsel_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x2u11__SVCount_t11svint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x2)(pn, zn, zm); @@ -305,27 +165,13 @@ svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __ar // CHECK-LABEL: @test_svsel_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x2u11__SVCount_t12svuint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x2)(pn, zn, zm); @@ -333,27 +179,13 @@ svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) _ // CHECK-LABEL: @test_svsel_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x2u11__SVCount_t13svfloat64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svsel_f64_x2(svcount_t pn, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x2)(pn, zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c index 997b6acf96244..871d70943c9df 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svsel_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x4u11__SVCount_t10svint8x4_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x4)(pn, zn1, zn2); @@ -55,35 +33,13 @@ svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_ // CHECK-LABEL: @test_svsel_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x4u11__SVCount_t11svuint8x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svuint8x4_t zn3, svuint8x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x4)(pn, zn1, zn2); @@ -93,35 +49,13 @@ svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svu // CHECK-LABEL: @test_svsel_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x4u11__SVCount_t11svint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, svint16x4_t zn3, svint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x4)(pn, zn1, zn2); @@ -129,35 +63,13 @@ svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, sv // CHECK-LABEL: @test_svsel_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x4u11__SVCount_t12svuint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, svuint16x4_t zn3, svuint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x4)(pn, zn1, zn2); @@ -165,35 +77,13 @@ svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, // CHECK-LABEL: @test_svsel_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x4u11__SVCount_t13svfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t zn2, svfloat16x4_t zn3, svfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x4)(pn, zn1, zn2); @@ -201,35 +91,13 @@ svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t z // CHECK-LABEL: @test_svsel_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x4u11__SVCount_t14svbfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4_t zn2, svbfloat16x4_t zn3, svbfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x4)(pn, zn1, zn2); @@ -239,35 +107,13 @@ svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4 // CHECK-LABEL: @test_svsel_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x4u11__SVCount_t11svint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, svint32x4_t zn3, svint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x4)(pn, zn1, zn2); @@ -275,35 +121,13 @@ svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, sv // CHECK-LABEL: @test_svsel_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x4u11__SVCount_t12svuint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, svuint32x4_t zn3, svuint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x4)(pn, zn1, zn2); @@ -311,35 +135,13 @@ svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, // CHECK-LABEL: @test_svsel_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x4u11__SVCount_t13svfloat32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t zn2, svfloat32x4_t zn3, svfloat32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x4)(pn, zn1, zn2); @@ -349,35 +151,13 @@ svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t z // CHECK-LABEL: @test_svsel_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x4u11__SVCount_t11svint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, svint64x4_t zn3, svint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x4)(pn, zn1, zn2); @@ -385,35 +165,13 @@ svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, sv // CHECK-LABEL: @test_svsel_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x4u11__SVCount_t12svuint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, svuint64x4_t zn3, svuint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x4)(pn, zn1, zn2); @@ -421,35 +179,13 @@ svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, // CHECK-LABEL: @test_svsel_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x4u11__SVCount_t13svfloat64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svsel_f64_x4(svcount_t pn, svfloat64x4_t zn1, svfloat64x4_t zn2, svfloat64x4_t zn3, svfloat64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x4)(pn, zn1, zn2); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c index de605bab67cc3..9a66ee5262082 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c @@ -20,27 +20,13 @@ // CHECK-LABEL: @test_svuzp_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x2)(zn); @@ -48,27 +34,13 @@ svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x2)(zn); @@ -78,27 +50,13 @@ svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x2)(zn); @@ -106,27 +64,13 @@ svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x2)(zn); @@ -134,27 +78,13 @@ svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x2)(zn); @@ -162,27 +92,13 @@ svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x2)(zn); @@ -192,27 +108,13 @@ svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x2)(zn); @@ -220,27 +122,13 @@ svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x2)(zn); @@ -248,27 +136,13 @@ svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x2)(zn); @@ -278,27 +152,13 @@ svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x2)(zn); @@ -306,27 +166,13 @@ svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x2)(zn); @@ -334,27 +180,13 @@ svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x2)(zn); @@ -364,27 +196,13 @@ svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x2)(zn); @@ -392,27 +210,13 @@ svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x2)(zn); @@ -420,27 +224,13 @@ svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x2)(zn); @@ -448,27 +238,13 @@ svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x2)(zn); @@ -476,27 +252,13 @@ svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x2)(zn); @@ -504,27 +266,13 @@ svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x2)(zn); @@ -532,27 +280,13 @@ svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x2)(zn); @@ -560,27 +294,13 @@ svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x2)(zn); @@ -588,27 +308,13 @@ svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x2)(zn); @@ -616,27 +322,13 @@ svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x2)(zn); @@ -644,27 +336,13 @@ svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x2)(zn); @@ -672,27 +350,13 @@ svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svuzpq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c index aa210f59508b5..131928615edcd 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c @@ -20,35 +20,13 @@ // CHECK-LABEL: @test_svuzp_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x4)(zn); @@ -56,35 +34,13 @@ svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x4)(zn); @@ -94,35 +50,13 @@ svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x4)(zn); @@ -130,35 +64,13 @@ svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x4)(zn); @@ -166,35 +78,13 @@ svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x4)(zn); @@ -202,35 +92,13 @@ svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x4)(zn); @@ -240,35 +108,13 @@ svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x4)(zn); @@ -276,35 +122,13 @@ svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x4)(zn); @@ -312,35 +136,13 @@ svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x4)(zn); @@ -350,35 +152,13 @@ svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x4)(zn); @@ -386,35 +166,13 @@ svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x4)(zn); @@ -422,35 +180,13 @@ svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x4)(zn); @@ -460,35 +196,13 @@ svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x4)(zn); @@ -496,35 +210,13 @@ svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x4)(zn); @@ -532,35 +224,13 @@ svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x4)(zn); @@ -568,35 +238,13 @@ svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x4)(zn); @@ -604,35 +252,13 @@ svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x4)(zn); @@ -640,35 +266,13 @@ svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x4)(zn); @@ -676,35 +280,13 @@ svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x4)(zn); @@ -712,35 +294,13 @@ svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x4)(zn); @@ -748,35 +308,13 @@ svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x4)(zn); @@ -784,35 +322,13 @@ svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x4)(zn); @@ -820,35 +336,13 @@ svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x4)(zn); @@ -856,35 +350,13 @@ svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svuzpq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c index a29c347e3197f..787b7d0b3ea1a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c @@ -19,27 +19,13 @@ // CHECK-LABEL: @test_svzip_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x2)(zn); @@ -47,27 +33,13 @@ svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x2)(zn); @@ -77,27 +49,13 @@ svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x2)(zn); @@ -105,27 +63,13 @@ svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x2)(zn); @@ -133,27 +77,13 @@ svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x2)(zn); @@ -161,27 +91,13 @@ svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x2)(zn); @@ -191,27 +107,13 @@ svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x2)(zn); @@ -219,27 +121,13 @@ svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x2)(zn); @@ -247,27 +135,13 @@ svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x2)(zn); @@ -277,27 +151,13 @@ svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x2)(zn); @@ -305,27 +165,13 @@ svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x2)(zn); @@ -333,27 +179,13 @@ svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x2)(zn); @@ -363,27 +195,13 @@ svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x2)(zn); @@ -391,27 +209,13 @@ svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x2)(zn); @@ -419,27 +223,13 @@ svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x2)(zn); @@ -447,27 +237,13 @@ svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x2)(zn); @@ -475,27 +251,13 @@ svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x2)(zn); @@ -503,27 +265,13 @@ svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x2)(zn); @@ -531,27 +279,13 @@ svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x2)(zn); @@ -559,27 +293,13 @@ svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x2)(zn); @@ -587,27 +307,13 @@ svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x2)(zn); @@ -615,27 +321,13 @@ svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x2)(zn); @@ -643,27 +335,13 @@ svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x2)(zn); @@ -671,27 +349,13 @@ svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svzipq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c index be40ecb4bcaa3..9bea471bc9837 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c @@ -19,35 +19,13 @@ // CHECK-LABEL: @test_svzip_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x4)(zn); @@ -55,35 +33,13 @@ svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x4)(zn); @@ -93,35 +49,13 @@ svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x4)(zn); @@ -129,35 +63,13 @@ svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x4)(zn); @@ -165,35 +77,13 @@ svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x4)(zn); @@ -201,35 +91,13 @@ svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x4)(zn); @@ -239,35 +107,13 @@ svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x4)(zn); @@ -275,35 +121,13 @@ svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x4)(zn); @@ -311,35 +135,13 @@ svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x4)(zn); @@ -349,35 +151,13 @@ svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x4)(zn); @@ -385,35 +165,13 @@ svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x4)(zn); @@ -421,35 +179,13 @@ svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x4)(zn); @@ -459,35 +195,13 @@ svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x4)(zn); @@ -495,35 +209,13 @@ svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x4)(zn); @@ -531,35 +223,13 @@ svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x4)(zn); @@ -567,35 +237,13 @@ svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x4)(zn); @@ -603,35 +251,13 @@ svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x4)(zn); @@ -639,35 +265,13 @@ svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x4)(zn); @@ -675,35 +279,13 @@ svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x4)(zn); @@ -711,35 +293,13 @@ svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x4)(zn); @@ -747,35 +307,13 @@ svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x4)(zn); @@ -783,35 +321,13 @@ svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x4)(zn); @@ -819,35 +335,13 @@ svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x4)(zn); @@ -855,35 +349,13 @@ svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svzipq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c index 77b02b4c4708f..7fa2249827c4e 100644 --- a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c +++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c @@ -11,28 +11,14 @@ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -42,28 +28,14 @@ svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -73,28 +45,14 @@ svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -104,28 +62,14 @@ svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -135,28 +79,14 @@ svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -166,28 +96,14 @@ svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_hor_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -198,28 +114,14 @@ svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -229,28 +131,14 @@ svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -260,28 +148,14 @@ svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -291,28 +165,14 @@ svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -322,28 +182,14 @@ svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -353,28 +199,14 @@ svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -389,28 +221,14 @@ svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -420,28 +238,14 @@ svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -451,28 +255,14 @@ svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -482,28 +272,14 @@ svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -513,28 +289,14 @@ svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -544,28 +306,14 @@ svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_ver_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -576,28 +324,14 @@ svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -607,28 +341,14 @@ svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -638,28 +358,14 @@ svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -669,28 +375,14 @@ svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -700,28 +392,14 @@ svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -731,28 +409,14 @@ svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -765,36 +429,14 @@ svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -804,36 +446,14 @@ svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -843,36 +463,14 @@ svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -882,36 +480,14 @@ svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -921,36 +497,14 @@ svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -960,36 +514,14 @@ svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_hor_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1000,36 +532,14 @@ svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1039,36 +549,14 @@ svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1078,36 +566,14 @@ svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1117,36 +583,14 @@ svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1156,36 +600,14 @@ svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1195,36 +617,14 @@ svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1236,36 +636,14 @@ svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1275,36 +653,14 @@ svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1314,36 +670,14 @@ svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1353,36 +687,14 @@ svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1392,36 +704,14 @@ svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1431,36 +721,14 @@ svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_ver_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1471,36 +739,14 @@ svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1510,36 +756,14 @@ svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1549,36 +773,14 @@ svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1588,36 +790,14 @@ svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1627,36 +807,14 @@ svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1666,36 +824,14 @@ svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_ver_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2120,28 +1256,14 @@ svfloat64_t test_svreadz_hor_za128_f64(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2151,28 +1273,14 @@ svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-LABEL: define dso_local { , } @test_svreadz_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2182,28 +1290,14 @@ svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2213,28 +1307,14 @@ svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2244,28 +1324,14 @@ svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2275,28 +1341,14 @@ svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2306,28 +1358,14 @@ svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2337,28 +1375,14 @@ svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2368,28 +1392,14 @@ svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z25test_svreadz_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2399,28 +1409,14 @@ svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , } @test_svreadz_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2430,28 +1426,14 @@ svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2461,28 +1443,14 @@ svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , } @test_svreadz_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2496,36 +1464,14 @@ svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2535,36 +1481,14 @@ svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2574,36 +1498,14 @@ svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2613,36 +1515,14 @@ svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2652,36 +1532,14 @@ svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2691,36 +1549,14 @@ svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2730,36 +1566,14 @@ svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2769,36 +1583,14 @@ svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2808,36 +1600,14 @@ svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svreadz_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2847,36 +1617,14 @@ svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2886,36 +1634,14 @@ svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2925,36 +1651,14 @@ svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret { , , , } [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 -// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svreadz_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c index 6cea34ee52ef6..deb126236ad57 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c @@ -141,27 +141,13 @@ svbool_t test_svpext_lane_c64_3(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 0); @@ -169,27 +155,13 @@ svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 1); @@ -197,31 +169,25 @@ svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 0); @@ -229,31 +195,25 @@ svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 1); @@ -261,31 +221,25 @@ svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 0); @@ -293,31 +247,25 @@ svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 1); @@ -325,31 +273,25 @@ svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 0); @@ -357,31 +299,25 @@ svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svpext_lane_c64_x2_1(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 1); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c index 3fcc1dc6c819a..612f2d25d40d0 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c @@ -23,28 +23,14 @@ // CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_s64,_x2)(op1, op2); @@ -53,28 +39,14 @@ svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_u64,_x2)(op1, op2); @@ -83,32 +55,26 @@ svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_s64,_x2)(op1, op2); @@ -117,32 +83,26 @@ svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_u64,_x2)(op1, op2); @@ -151,32 +111,26 @@ svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_s64,_x2)(op1, op2); @@ -185,32 +139,26 @@ svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_u64,_x2)(op1, op2); @@ -219,32 +167,26 @@ svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_s64,_x2)(op1, op2); @@ -253,32 +195,26 @@ svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_u64,_x2)(op1, op2); @@ -287,28 +223,14 @@ svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_s64,_x2)(op1, op2); @@ -317,28 +239,14 @@ svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_u64,_x2)(op1, op2); @@ -347,32 +255,26 @@ svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_s64,_x2)(op1, op2); @@ -381,32 +283,26 @@ svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_u64,_x2)(op1, op2); @@ -415,32 +311,26 @@ svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_s64,_x2)(op1, op2); @@ -449,32 +339,26 @@ svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_u64,_x2)(op1, op2); @@ -483,32 +367,26 @@ svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_s64,_x2)(op1, op2); @@ -517,32 +395,26 @@ svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_u64,_x2)(op1, op2); @@ -551,28 +423,14 @@ svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_s64,_x2)(op1, op2); @@ -581,28 +439,14 @@ svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_u64,_x2)(op1, op2); @@ -611,32 +455,26 @@ svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_s64,_x2)(op1, op2); @@ -645,32 +483,26 @@ svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_u64,_x2)(op1, op2); @@ -679,32 +511,26 @@ svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_s64,_x2)(op1, op2); @@ -713,32 +539,26 @@ svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_u64,_x2)(op1, op2); @@ -747,32 +567,26 @@ svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_s64,_x2)(op1, op2); @@ -781,32 +595,26 @@ svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_u64,_x2)(op1, op2); @@ -815,28 +623,14 @@ svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_s64,_x2)(op1, op2); @@ -845,28 +639,14 @@ svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_u64,_x2)(op1, op2); @@ -875,32 +655,26 @@ svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_s64,_x2)(op1, op2); @@ -909,32 +683,26 @@ svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_u64,_x2)(op1, op2); @@ -943,32 +711,26 @@ svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_s64,_x2)(op1, op2); @@ -977,32 +739,26 @@ svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_u64,_x2)(op1, op2); @@ -1011,32 +767,26 @@ svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_s64,_x2)(op1, op2); @@ -1045,32 +795,26 @@ svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CHECK-NEXT: ret { , } [[TMP7]] +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CHECK-NEXT: ret { , } [[TMP6]] // // CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 -// CPP-CHECK-NEXT: ret { , } [[TMP7]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , } [[TMP3]], [[TMP5]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP6]] // svboolx2_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_u64,_x2)(op1, op2); From ce6c236c965dc1bb5fa2257e17ea253a015705cc Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 25 Sep 2024 11:23:58 +0100 Subject: [PATCH 140/212] [ADT][NFC] Simplify SmallSet (#109412) - Remove dependence on `STLExtras.h`. - Remove unused header inclusions. - Make `count` use `contains` for deduplication. - Replace hand-written linear scans on Vector by `std::find`. --- clang/lib/Basic/TargetID.cpp | 1 + llvm/include/llvm/ADT/SmallSet.h | 37 +++++++------------------------- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp index 3c06d9bad1dc0..fa1bfec2aacb9 100644 --- a/clang/lib/Basic/TargetID.cpp +++ b/clang/lib/Basic/TargetID.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/TargetID.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/TargetParser.h" diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index 630c98504261a..8d7511bf0bc8d 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -16,14 +16,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/type_traits.h" #include #include #include -#include #include namespace llvm { @@ -139,10 +135,6 @@ class SmallSet { SmallVector Vector; std::set Set; - using VIterator = typename SmallVector::const_iterator; - using SIterator = typename std::set::const_iterator; - using mutable_iterator = typename SmallVector::iterator; - // In small mode SmallPtrSet uses linear search for the elements, so it is // not a good idea to choose this value too high. You may consider using a // DenseSet<> instead if you expect many elements in the set. @@ -163,13 +155,7 @@ class SmallSet { } /// count - Return 1 if the element is in the set, 0 otherwise. - size_type count(const T &V) const { - if (isSmall()) { - // Since the collection is small, just do a linear search. - return vfind(V) == Vector.end() ? 0 : 1; - } - return Set.count(V); - } + size_type count(const T &V) const { return contains(V) ? 1 : 0; } /// insert - Insert an element into the set if it isn't already there. /// Returns a pair. The first value of it is an iterator to the inserted @@ -181,7 +167,7 @@ class SmallSet { return std::make_pair(const_iterator(I), Inserted); } - VIterator I = vfind(V); + auto I = std::find(Vector.begin(), Vector.end(), V); if (I != Vector.end()) // Don't reinsert if it already exists. return std::make_pair(const_iterator(I), false); if (Vector.size() < N) { @@ -206,11 +192,11 @@ class SmallSet { bool erase(const T &V) { if (!isSmall()) return Set.erase(V); - for (mutable_iterator I = Vector.begin(), E = Vector.end(); I != E; ++I) - if (*I == V) { - Vector.erase(I); - return true; - } + auto I = std::find(Vector.begin(), Vector.end(), V); + if (I != Vector.end()) { + Vector.erase(I); + return true; + } return false; } @@ -234,19 +220,12 @@ class SmallSet { /// Check if the SmallSet contains the given element. bool contains(const T &V) const { if (isSmall()) - return vfind(V) != Vector.end(); + return std::find(Vector.begin(), Vector.end(), V) != Vector.end(); return Set.find(V) != Set.end(); } private: bool isSmall() const { return Set.empty(); } - - VIterator vfind(const T &V) const { - for (VIterator I = Vector.begin(), E = Vector.end(); I != E; ++I) - if (*I == V) - return I; - return Vector.end(); - } }; /// If this set is of pointer values, transparently switch over to using From e4688b98cd2b86035a2b563a8db0819710d6275a Mon Sep 17 00:00:00 2001 From: Chengjun Date: Wed, 25 Sep 2024 03:41:13 -0700 Subject: [PATCH 141/212] [SimplifyCFG] Avoid increasing too many phi entries when removing empty blocks (#104887) Now in the simplifycfg and jumpthreading passes, we will remove the empty blocks (blocks only have phis and an unconditional branch). However, in some cases, this will increase size of the IR and slow down the compile of other passes dramatically. For example, we have the following CFG: 1. BB1 has 100 predecessors, and unconditionally branches to BB2 (does not have any other instructions). 2. BB2 has 100 phis. Then in this case, if we remove BB1, for every phi in BB2, we need to increase 99 entries (replace the incoming edge from BB1 with 100 edges from its predecessors). Then in total, we will increase 9900 phi entries, which can slow down the compile time for many other passes. Therefore, in this change, we add a check to see whether removing the empty blocks will increase lots of phi entries. Now, the threshold is 1000 (can be controlled by the command line option `max-phi-entries-increase-after-removing-empty-block`), which means that we will not remove an empty block if it will increase the total number of phi entries by 1000. This threshold is conservative and for most of the cases, we will not have such a large phi. So, this will only be triggered in some unusual IRs. --- llvm/lib/Transforms/Utils/Local.cpp | 35 +++- .../SimplifyCFG/avoid-complex-phi.ll | 164 ++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 725b512fb86e7..7659fc6919615 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -112,6 +112,12 @@ static cl::opt PHICSENumPHISmallSize( "When the basic block contains not more than this number of PHI nodes, " "perform a (faster!) exhaustive search instead of set-driven one.")); +static cl::opt MaxPhiEntriesIncreaseAfterRemovingEmptyBlock( + "max-phi-entries-increase-after-removing-empty-block", cl::init(1000), + cl::Hidden, + cl::desc("Stop removing an empty block if removing it will introduce more " + "than this number of phi entries in its successor")); + // Max recursion depth for collectBitParts used when detecting bswap and // bitreverse idioms. static const unsigned BitPartRecursionMaxDepth = 48; @@ -1047,6 +1053,33 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, return true; } +/// Check whether removing \p BB will make the phis in its \p Succ have too +/// many incoming entries. This function does not check whether \p BB is +/// foldable or not. +static bool introduceTooManyPhiEntries(BasicBlock *BB, BasicBlock *Succ) { + // If BB only has one predecessor, then removing it will not introduce more + // incoming edges for phis. + if (BB->hasNPredecessors(1)) + return false; + unsigned NumPreds = pred_size(BB); + unsigned NumChangedPhi = 0; + for (auto &Phi : Succ->phis()) { + // If the incoming value is a phi and the phi is defined in BB, + // then removing BB will not increase the total phi entries of the ir. + if (auto *IncomingPhi = dyn_cast(Phi.getIncomingValueForBlock(BB))) + if (IncomingPhi->getParent() == BB) + continue; + // Otherwise, we need to add entries to the phi + NumChangedPhi++; + } + // For every phi that needs to be changed, (NumPreds - 1) new entries will be + // added. If the total increase in phi entries exceeds + // MaxPhiEntriesIncreaseAfterRemovingEmptyBlock, it will be considered as + // introducing too many new phi entries. + return (NumPreds - 1) * NumChangedPhi > + MaxPhiEntriesIncreaseAfterRemovingEmptyBlock; +} + /// Replace a value flowing from a block to a phi with /// potentially multiple instances of that value flowing from the /// block's predecessors to the phi. @@ -1146,7 +1179,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, BBKillable || CanRedirectPredsOfEmptyBBToSucc(BB, Succ, BBPreds, SuccPreds, CommonPred); - if (!BBKillable && !BBPhisMergeable) + if ((!BBKillable && !BBPhisMergeable) || introduceTooManyPhiEntries(BB, Succ)) return false; // Check to see if merging these blocks/phis would cause conflicts for any of diff --git a/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll b/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll new file mode 100644 index 0000000000000..c24fae7aa67bb --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/avoid-complex-phi.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=12 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-12 %s +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=11 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-11 %s +; RUN: opt < %s -max-phi-entries-increase-after-removing-empty-block=4 -passes=simplifycfg -S | FileCheck --check-prefixes=CHECK-4 %s +; +; This test has the following CFG: +; 1. entry has a switch to 4 blocks: B1 - B4 +; 2. For B1 and B2, it branches to B5 and B6 +; 3. For B3 and B4, it branches to B5 and B7 +; 4. In B5, %val is defined as phi taking values from B1 to B4 +; 5. B5, B6, B7 branch to block Merge unconditionally +; 6. Block Merge has 5 phis(%x1 - %x4 and %val_merge). +; +; If we remove B5, %x1 - %x4 will increase the number of phi entries by (4 - 1) * 4 = 12. For %val_merge, since the value taking from B5 +; is defined in B5, it will not increase the number of phi entries (it can be considered as move the entries from %val to +; %val_merge). Therefore, removing B5 will increase the number of phi entries by 12 (not (4 - 1) * 5 = 15). +; +; If we remove B6 / B7, it will increase the number of phi entries by (2 - 1) * 5 = 5. +; +; In the first test, max-phi-entries-increase-after-removing-empty-block is set to be 12, then B5 will be removed. +; In the second test, max-phi-entries-increase-after-removing-empty-block is set to be 11, then B5 should not be removed, +; but B6 and B7 can be removed. +; In the third test, max-phi-entries-increase-after-removing-empty-block is set to be 4, then no BB can be removed. +; +define void @foo(i32 %a, i32 %val1, i32 %val2, i32 %val3, i32 %val4) { +; CHECK-12-LABEL: define void @foo( +; CHECK-12-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-12-NEXT: [[ENTRY:.*:]] +; CHECK-12-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-12-NEXT: i32 4, label %[[B4:.*]] +; CHECK-12-NEXT: i32 2, label %[[B2:.*]] +; CHECK-12-NEXT: i32 3, label %[[B3:.*]] +; CHECK-12-NEXT: ] +; CHECK-12: [[B1]]: +; CHECK-12-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-12-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[MERGE:.*]] +; CHECK-12: [[B2]]: +; CHECK-12-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-12-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[MERGE]] +; CHECK-12: [[B3]]: +; CHECK-12-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-12-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[MERGE]] +; CHECK-12: [[B4]]: +; CHECK-12-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-12-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[MERGE]] +; CHECK-12: [[B6]]: +; CHECK-12-NEXT: br label %[[MERGE]] +; CHECK-12: [[B7]]: +; CHECK-12-NEXT: br label %[[MERGE]] +; CHECK-12: [[MERGE]]: +; CHECK-12-NEXT: [[X1:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 1, %[[B4]] ], [ 1, %[[B3]] ], [ 1, %[[B2]] ], [ 1, %[[B1]] ] +; CHECK-12-NEXT: [[X2:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ], [ 2, %[[B2]] ], [ 2, %[[B1]] ] +; CHECK-12-NEXT: [[X3:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 3, %[[B4]] ], [ 3, %[[B3]] ], [ 3, %[[B2]] ], [ 3, %[[B1]] ] +; CHECK-12-NEXT: [[X4:%.*]] = phi i16 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ 4, %[[B4]] ], [ 4, %[[B3]] ], [ 4, %[[B2]] ], [ 4, %[[B1]] ] +; CHECK-12-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ 0, %[[B6]] ], [ 2, %[[B7]] ], [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-12-NEXT: ret void +; +; CHECK-11-LABEL: define void @foo( +; CHECK-11-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-11-NEXT: [[ENTRY:.*:]] +; CHECK-11-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-11-NEXT: i32 4, label %[[B4:.*]] +; CHECK-11-NEXT: i32 2, label %[[B2:.*]] +; CHECK-11-NEXT: i32 3, label %[[B3:.*]] +; CHECK-11-NEXT: ] +; CHECK-11: [[B1]]: +; CHECK-11-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-11-NEXT: br i1 [[CMP1]], label %[[MERGE:.*]], label %[[B5:.*]] +; CHECK-11: [[B2]]: +; CHECK-11-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-11-NEXT: br i1 [[CMP2]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B3]]: +; CHECK-11-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-11-NEXT: br i1 [[CMP3]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B4]]: +; CHECK-11-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-11-NEXT: br i1 [[CMP4]], label %[[MERGE]], label %[[B5]] +; CHECK-11: [[B5]]: +; CHECK-11-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-11-NEXT: br label %[[MERGE]] +; CHECK-11: [[MERGE]]: +; CHECK-11-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B2]] ], [ 0, %[[B1]] ], [ 2, %[[B4]] ], [ 2, %[[B3]] ] +; CHECK-11-NEXT: ret void +; +; CHECK-4-LABEL: define void @foo( +; CHECK-4-SAME: i32 [[A:%.*]], i32 [[VAL1:%.*]], i32 [[VAL2:%.*]], i32 [[VAL3:%.*]], i32 [[VAL4:%.*]]) { +; CHECK-4-NEXT: [[ENTRY:.*:]] +; CHECK-4-NEXT: switch i32 [[A]], label %[[B1:.*]] [ +; CHECK-4-NEXT: i32 4, label %[[B4:.*]] +; CHECK-4-NEXT: i32 2, label %[[B2:.*]] +; CHECK-4-NEXT: i32 3, label %[[B3:.*]] +; CHECK-4-NEXT: ] +; CHECK-4: [[B1]]: +; CHECK-4-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VAL1]], 1 +; CHECK-4-NEXT: br i1 [[CMP1]], label %[[B6:.*]], label %[[B5:.*]] +; CHECK-4: [[B2]]: +; CHECK-4-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VAL2]], 2 +; CHECK-4-NEXT: br i1 [[CMP2]], label %[[B6]], label %[[B5]] +; CHECK-4: [[B3]]: +; CHECK-4-NEXT: [[CMP3:%.*]] = icmp eq i32 [[VAL3]], 3 +; CHECK-4-NEXT: br i1 [[CMP3]], label %[[B7:.*]], label %[[B5]] +; CHECK-4: [[B4]]: +; CHECK-4-NEXT: [[CMP4:%.*]] = icmp eq i32 [[VAL4]], 4 +; CHECK-4-NEXT: br i1 [[CMP4]], label %[[B7]], label %[[B5]] +; CHECK-4: [[B5]]: +; CHECK-4-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL1]], %[[B1]] ], [ [[VAL2]], %[[B2]] ], [ [[VAL3]], %[[B3]] ], [ [[VAL4]], %[[B4]] ] +; CHECK-4-NEXT: br label %[[MERGE:.*]] +; CHECK-4: [[B6]]: +; CHECK-4-NEXT: br label %[[MERGE]] +; CHECK-4: [[B7]]: +; CHECK-4-NEXT: br label %[[MERGE]] +; CHECK-4: [[MERGE]]: +; CHECK-4-NEXT: [[X1:%.*]] = phi i16 [ 1, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X2:%.*]] = phi i16 [ 2, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X3:%.*]] = phi i16 [ 3, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[X4:%.*]] = phi i16 [ 4, %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: [[VAL_MERGE:%.*]] = phi i32 [ [[VAL]], %[[B5]] ], [ 0, %[[B6]] ], [ 2, %[[B7]] ] +; CHECK-4-NEXT: ret void +; +entry: + switch i32 %a, label %B1 [ + i32 4, label %B4 + i32 2, label %B2 + i32 3, label %B3 + ] + +B1: ; preds = %entry + %cmp1 = icmp eq i32 %val1, 1 + br i1 %cmp1, label %B6, label %B5 + +B2: ; preds = %entry + %cmp2 = icmp eq i32 %val2, 2 + br i1 %cmp2, label %B6, label %B5 + +B3: ; preds = %entry + %cmp3 = icmp eq i32 %val3, 3 + br i1 %cmp3, label %B7, label %B5 + +B4: ; preds = %entry + %cmp4 = icmp eq i32 %val4, 4 + br i1 %cmp4, label %B7, label %B5 + +B5: ; preds = %B4, %B3, %B2, %B1 + %val = phi i32 [ %val1, %B1 ], [ %val2, %B2 ], [ %val3, %B3 ], [ %val4, %B4 ] + br label %Merge + +B6: ; preds = %B2, %B1 + br label %Merge + +B7: ; preds = %B4, %B3 + br label %Merge + +Merge: ; preds = %B7, %B6, %B5 + %x1 = phi i16 [ 1, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x2 = phi i16 [ 2, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x3 = phi i16 [ 3, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %x4 = phi i16 [ 4, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + %val_merge = phi i32 [ %val, %B5 ], [ 0, %B6 ], [ 2, %B7 ] + ret void +} From de70b959b152a071c3d788492a3a37470163af55 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Wed, 25 Sep 2024 12:42:57 +0200 Subject: [PATCH 142/212] [AMDGPU] Fix typo in promoteUniformOpToI32 (#109942) --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 08f2ff4566b67..2464361d4eece 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6797,8 +6797,7 @@ SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op, LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS}); // Special case: for shifts, the RHS always needs a zext. - if (Op.getOpcode() == ISD::SRA || Op.getOpcode() == ISD::SRL || - Op.getOpcode() == ISD::SRA) + if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS}); else RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS}); From 8ea0dbab2e623df499bdce122394ed9bcfe2172e Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 25 Sep 2024 03:48:46 -0700 Subject: [PATCH 143/212] [mlir] Remove spurious CMake dependencies for convert-vector-to-llvm (NFC) These don't seem used by this pass. --- mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt index aef3cf467fb65..35576732c82cf 100644 --- a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt @@ -35,12 +35,9 @@ add_mlir_conversion_library(MLIRVectorToLLVMPass MLIRVectorToLLVM MLIRArmNeonDialect - MLIRArmNeonTransforms MLIRArmSMEDialect - MLIRArmSMETransforms MLIRArmSVEDialect MLIRArmSVETransforms - MLIRVectorToArmSME MLIRAMXDialect MLIRAMXTransforms MLIRX86VectorDialect From 63b534be1765391d102464d26208eef3510fd62d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 25 Sep 2024 18:49:52 +0800 Subject: [PATCH 144/212] [RISCV] Fold vmv.x.s into load from stack (#109774) If a vector is reloaded from the stack to be used in vmv.x.s, we can tell foldMemoryOperandImpl to fold it into a scalar load. If XLEN < SEW then this currently just bails. I couldn't think of a way to express a vmv.x.s that truncates in LLVM IR. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 23 +++ llvm/test/CodeGen/RISCV/rvv/stack-folding.ll | 162 +++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/stack-folding.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b594531ccb095..8dafd824963c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -761,6 +761,29 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( LoadOpc = RISCV::LBU; break; } + if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VMV_X_S) { + unsigned Log2SEW = + MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); + if (STI.getXLen() < (1 << Log2SEW)) + return nullptr; + switch (Log2SEW) { + case 3: + LoadOpc = RISCV::LB; + break; + case 4: + LoadOpc = RISCV::LH; + break; + case 5: + LoadOpc = RISCV::LW; + break; + case 6: + LoadOpc = RISCV::LD; + break; + default: + llvm_unreachable("Unexpected SEW"); + } + break; + } return nullptr; case RISCV::SEXT_H: LoadOpc = RISCV::LH; diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll new file mode 100644 index 0000000000000..4771d7fe6ec92 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s + +define i64 @i64( %v, i1 %c) { +; RV32-LABEL: i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: andi a0, a0, 1 +; RV32-NEXT: #APP +; RV32-NEXT: #NO_APP +; RV32-NEXT: beqz a0, .LBB0_2 +; RV32-NEXT: # %bb.1: # %truebb +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v8, v9, a0 +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: j .LBB0_3 +; RV32-NEXT: .LBB0_2: # %falsebb +; RV32-NEXT: li a1, 0 +; RV32-NEXT: .LBB0_3: # %falsebb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add sp, sp, a2 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: #APP +; RV64-NEXT: #NO_APP +; RV64-NEXT: beqz a0, .LBB0_2 +; RV64-NEXT: # %bb.1: # %truebb +; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: .LBB0_2: # %falsebb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add sp, sp, a1 +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i64 %x +falsebb: + ret i64 0 +} + +define i32 @i32( %v, i1 %c) { +; CHECK-LABEL: i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB1_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i32 %x +falsebb: + ret i32 0 +} + +define i16 @i16( %v, i1 %c) { +; CHECK-LABEL: i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lh a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB2_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i16 %x +falsebb: + ret i16 0 +} + +define i8 @i8( %v, i1 %c) { +; CHECK-LABEL: i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: beqz a0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %truebb +; CHECK-NEXT: lb a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: .LBB3_2: # %falsebb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + br i1 %c, label %truebb, label %falsebb +truebb: + %x = extractelement %v, i32 0 + ret i8 %x +falsebb: + ret i8 0 +} From f43ad88ae1adf15cffcb8d4a9e521644315f7a8d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 25 Sep 2024 18:50:16 +0800 Subject: [PATCH 145/212] [RISCV] Handle zvfhmin and zvfbfmin promotion to f32 in half arith costs (#108361) Arithmetic half or bfloat ops on zvfhmin and zvfbfmin respectively will be promoted and carried out in f32, so this updates getArithmeticInstrCost to check for this. --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 35 ++- .../test/Analysis/CostModel/RISCV/arith-fp.ll | 224 +++++++++++------- 2 files changed, 172 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 768df71715fa6..3bef01da0a445 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1908,6 +1908,29 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Args, CxtI); + // f16 with zvfhmin and bf16 will be promoted to f32. + // FIXME: nxv32[b]f16 will be custom lowered and split. + unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); + InstructionCost CastCost = 0; + if ((LT.second.getVectorElementType() == MVT::f16 || + LT.second.getVectorElementType() == MVT::bf16) && + TLI->getOperationAction(ISDOpcode, LT.second) == + TargetLoweringBase::LegalizeAction::Promote) { + MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second); + Type *PromotedTy = EVT(PromotedVT).getTypeForEVT(Ty->getContext()); + Type *LegalTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); + // Add cost of extending arguments + CastCost += LT.first * Args.size() * + getCastInstrCost(Instruction::FPExt, PromotedTy, LegalTy, + TTI::CastContextHint::None, CostKind); + // Add cost of truncating result + CastCost += + LT.first * getCastInstrCost(Instruction::FPTrunc, LegalTy, PromotedTy, + TTI::CastContextHint::None, CostKind); + // Compute cost of op in promoted type + LT.second = PromotedVT; + } + auto getConstantMatCost = [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost { if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand)) @@ -1929,7 +1952,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( ConstantMatCost += getConstantMatCost(1, Op2Info); unsigned Op; - switch (TLI->InstructionOpcodeToISD(Opcode)) { + switch (ISDOpcode) { case ISD::ADD: case ISD::SUB: Op = RISCV::VADD_VV; @@ -1959,11 +1982,9 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( break; case ISD::FADD: case ISD::FSUB: - // TODO: Address FP16 with VFHMIN Op = RISCV::VFADD_VV; break; case ISD::FMUL: - // TODO: Address FP16 with VFHMIN Op = RISCV::VFMUL_VV; break; case ISD::FDIV: @@ -1975,9 +1996,9 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( default: // Assuming all other instructions have the same cost until a need arises to // differentiate them. - return ConstantMatCost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, - Op1Info, Op2Info, - Args, CxtI); + return CastCost + ConstantMatCost + + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, + Args, CxtI); } InstructionCost InstrCost = getRISCVInstructionCost(Op, LT.second, CostKind); @@ -1986,7 +2007,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( // scalar floating point ops aren't cheaper than their vector equivalents. if (Ty->isFPOrFPVectorTy()) InstrCost *= 2; - return ConstantMatCost + LT.first * InstrCost; + return CastCost + ConstantMatCost + LT.first * InstrCost; } // TODO: Deduplicate from TargetTransformInfoImplCRTPBase. diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll index b96fdb0109829..b3e66ccc705f8 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll @@ -14,11 +14,11 @@ define void @fadd() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fadd <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fadd <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fadd <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef @@ -81,21 +81,37 @@ define void @fadd() { } define void @fadd_f16() { -; CHECK-LABEL: 'fadd_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fadd_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fadd_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fadd <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fadd <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fadd <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fadd <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fadd <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fadd <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fadd half undef, undef @@ -126,11 +142,11 @@ define void @fsub() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fsub <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fsub <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fsub <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef @@ -193,21 +209,37 @@ define void @fsub() { } define void @fsub_f16() { -; CHECK-LABEL: 'fsub_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fsub_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fsub_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fsub <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fsub <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fsub <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fsub <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fsub <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fsub <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fsub half undef, undef @@ -238,11 +270,11 @@ define void @fmul() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fmul <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fmul <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fmul <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef @@ -305,21 +337,37 @@ define void @fmul() { } define void @fmul_f16() { -; CHECK-LABEL: 'fmul_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fmul_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fmul_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fmul <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fmul <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fmul <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fmul <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fmul <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fmul <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fmul half undef, undef @@ -350,11 +398,11 @@ define void @fdiv() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4BF16 = fdiv <4 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8BF16 = fdiv <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16BF16 = fdiv <16 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8BF16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8BF16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16BF16 = fdiv undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef @@ -417,21 +465,37 @@ define void @fdiv() { } define void @fdiv_f16() { -; CHECK-LABEL: 'fdiv_f16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ZVFH-LABEL: 'fdiv_f16' +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef +; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; ZVFHMIN-LABEL: 'fdiv_f16' +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1F16 = fdiv <1 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = fdiv <2 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F16 = fdiv <4 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F16 = fdiv <8 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16F16 = fdiv <16 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32F16 = fdiv <32 x half> undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV1F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV2F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %NXV4F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NXV16F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef +; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %F16 = fdiv half undef, undef From 4f951503b9b6519906bfe4608bf151057a210b22 Mon Sep 17 00:00:00 2001 From: sstipano <146831748+sstipano@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:02:29 +0200 Subject: [PATCH 146/212] Reland "[AMDGPU][GlobalIsel] Use isRegisterClassType for G_FREEZE and G_IMPLICIT_DEF (#101331)" (#109958) S192 type was missing from AllScalarTypes. --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 9 +- .../GlobalISel/inst-select-unmerge-values.mir | 18 +- .../AMDGPU/GlobalISel/legalize-freeze.mir | 30 +- .../GlobalISel/legalize-implicit-def.mir | 28 +- .../GlobalISel/legalize-insert-vector-elt.mir | 14 +- .../AMDGPU/GlobalISel/legalize-phi.mir | 152 +- .../AMDGPU/GlobalISel/regbankselect.mir | 19 - llvm/test/CodeGen/AMDGPU/freeze.ll | 1856 +++++++++++++++++ 8 files changed, 1969 insertions(+), 157 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/freeze.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index e657f668cc656..271c8d45fd4a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -289,9 +289,11 @@ static const LLT F64 = LLT::float64(); static const LLT S96 = LLT::scalar(96); static const LLT S128 = LLT::scalar(128); static const LLT S160 = LLT::scalar(160); +static const LLT S192 = LLT::scalar(192); static const LLT S224 = LLT::scalar(224); static const LLT S256 = LLT::scalar(256); static const LLT S512 = LLT::scalar(512); +static const LLT S1024 = LLT::scalar(1024); static const LLT MaxScalar = LLT::scalar(MaxRegisterSize); static const LLT V2S8 = LLT::fixed_vector(2, 8); @@ -332,8 +334,8 @@ static const LLT V16S64 = LLT::fixed_vector(16, 64); static const LLT V2S128 = LLT::fixed_vector(2, 128); static const LLT V4S128 = LLT::fixed_vector(4, 128); -static std::initializer_list AllScalarTypes = {S32, S64, S96, S128, - S160, S224, S256, S512}; +static std::initializer_list AllScalarTypes = { + S32, S64, S96, S128, S160, S192, S224, S256, S512, S1024}; static std::initializer_list AllS16Vectors{ V2S16, V4S16, V6S16, V8S16, V10S16, V12S16, V16S16, V2S128, V4S128}; @@ -889,10 +891,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .clampScalar(0, S16, S64); getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) - .legalIf(isRegisterType(0)) + .legalIf(isRegisterClassType(0)) // s1 and s16 are special cases because they have legal operations on // them, but don't really occupy registers in the normal way. .legalFor({S1, S16}) + .clampNumElements(0, V16S32, V32S32) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .clampScalarOrElt(0, S32, MaxScalar) .widenScalarToNextPow2(0, 32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index bec5f646b7839..837f65d4bdec6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -171,11 +171,9 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 - ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF + ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192) + ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64) %0:sgpr(s192) = G_IMPLICIT_DEF %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -294,11 +292,11 @@ body: | ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>) ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>) - ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>) - ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>), [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]](<3 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[COPY5]](<3 x s32>) + ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV]](<3 x s32>) + ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV1]](<3 x s32>) %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6 %2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index c06df6312c9c5..b08f850b5b2b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -171,12 +171,8 @@ body: | ; CHECK-LABEL: name: test_freeze_s448 ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[COPY]](s512) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s448) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FREEZE]](s448) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[DEF]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512) %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s448) = G_TRUNC %0 %2:_(s448) = G_FREEZE %1 @@ -399,14 +395,12 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v33s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE2]](s32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<32 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32) ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x s32>) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(<33 x s32>) = G_FREEZE %0 @@ -419,12 +413,10 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v64s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[FREEZE3]](<16 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<32 x s32>), [[FREEZE1]](<32 x s32>) ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<64 x s32>) = G_FREEZE %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index b9edfbfa6d0a9..8113ebfa5362e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -135,8 +135,9 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_s448 - ; CHECK: [[DEF:%[0-9]+]]:_(s448) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s448), 0 + ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[DEF]](s512) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[TRUNC]](s448), 0 ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s448) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 @@ -295,18 +296,6 @@ body: | $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0 ... ---- -name: test_implicit_def_v17s32 -body: | - bb.0: - - ; CHECK-LABEL: name: test_implicit_def_v17s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<17 x s32>) - %0:_(<17 x s32>) = G_IMPLICIT_DEF - S_NOP 0, implicit %0 -... - --- name: test_implicit_def_v32s32 body: | @@ -328,9 +317,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) @@ -348,10 +337,9 @@ body: | bb.0: ; CHECK-LABEL: name: test_implicit_def_v64s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<32 x s32>), [[DEF]](<32 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[DEF]](<32 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<32 x s32>), %2:_(<32 x s32>) = G_UNMERGE_VALUES %0 S_NOP 0, implicit %0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index b57dd396ae355..bebbf2a262256 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -190,13 +190,11 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32 ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>), [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) @@ -243,10 +241,8 @@ body: | ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](s64) ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>), [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>), [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 00612d552a104..d82e8328f26ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -673,88 +673,86 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV64]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV65]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV66]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV67]] - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] - ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] - ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] - ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] - ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] - ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] - ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] - ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] - ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] - ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] - ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] - ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] - ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] - ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] - ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] - ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] - ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] - ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] - ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] - ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] - ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] - ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] - ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] - ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] - ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] - ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] - ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] - ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] - ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] - ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] - ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] - ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] - ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] - ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] - ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] - ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] - ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] - ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] - ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] - ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] - ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] - ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] - ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] - ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] - ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] - ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] - ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] - ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] - ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] - ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] - ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] - ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] - ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] - ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] - ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] - ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] - ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32), [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32), [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32), [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32), [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32), [[UV128:%[0-9]+]]:_(s32), [[UV129:%[0-9]+]]:_(s32), [[UV130:%[0-9]+]]:_(s32), [[UV131:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] + ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] + ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] + ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] + ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] + ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] + ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] + ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] + ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] + ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] + ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] + ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] + ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] + ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] + ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] + ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] + ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] + ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] + ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] + ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] + ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] + ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] + ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] + ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] + ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] + ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] + ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] + ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] + ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] + ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV64]], [[UV128]] + ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[UV129]] + ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV66]], [[UV130]] + ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV67]], [[UV131]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) @@ -762,10 +760,10 @@ body: | ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV2]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV3]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index c50187f594901..1565986516860 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -42,8 +42,6 @@ ret void } - define void @non_power_of_2() { ret void } - define amdgpu_kernel void @load_constant_v4i16_from_8_align8(ptr addrspace(4) %ptr0) { ret void } @@ -186,23 +184,6 @@ body: | %1:_(s32) = G_LOAD %0 :: (load (s32) from %ir.tmp1) ... ---- -name: non_power_of_2 -legalized: true - -body: | - bb.0: - ; CHECK-LABEL: name: non_power_of_2 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s448) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[DEF]](s448), 0 - ; CHECK-NEXT: $sgpr0 = COPY [[EXTRACT]](s32) - ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0 - %0:_(s448) = G_IMPLICIT_DEF - %1:_(s32) = G_EXTRACT %0:_(s448), 0 - $sgpr0 = COPY %1:_(s32) - SI_RETURN_TO_EPILOG $sgpr0 -... - --- name: load_constant_v4i16_from_8_align8 legalized: true diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll new file mode 100644 index 0000000000000..22427ee344d91 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/freeze.ll @@ -0,0 +1,1856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s + +define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v2i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <2 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <2 x i32> %a + store <2 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v3i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx3 v[4:6], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx3 v[2:3], v[4:6], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v3i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b96 v[4:6], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b96 v[2:3], v[4:6], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <3 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <3 x i32> %a + store <3 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v4i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v4i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <4 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <4 x i32> %a + store <4 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v5i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dword v8, v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v8, off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v5i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v5i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b32 v8, v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v8, off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v5i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <5 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <5 x i32> %a + store <5 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v6i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v6i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v6i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b64 v[8:9], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v6i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <6 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <6 x i32> %a + store <6 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v7i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v7i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v7i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v7i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b96 v[8:10], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[8:10], off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <7 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <7 x i32> %a + store <7 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v8i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v8i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v8i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v8i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <8 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <8 x i32> %a + store <8 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v9i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x2 +; GFX10-SDAG-NEXT: global_load_dword v12, v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v12, off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v9i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x2 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v9i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x2 +; GFX11-SDAG-NEXT: global_load_b32 v12, v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v12, off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v9i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x2 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:32 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <9 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <9 x i32> %a + store <9 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v10i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32 +; GFX10-NEXT: s_waitcnt vmcnt(2) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v10i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off offset:32 +; GFX11-NEXT: s_waitcnt vmcnt(2) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off offset:32 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <10 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <10 x i32> %a + store <10 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v11i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32 +; GFX10-NEXT: s_waitcnt vmcnt(2) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v11i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-NEXT: global_load_b96 v[12:14], v[0:1], off offset:32 +; GFX11-NEXT: s_waitcnt vmcnt(2) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b96 v[2:3], v[12:14], off offset:32 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <11 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <11 x i32> %a + store <11 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_v12i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-NEXT: s_waitcnt vmcnt(2) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_v12i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-NEXT: s_waitcnt vmcnt(2) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load <12 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <12 x i32> %a + store <12 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} +define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v13i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x3 +; GFX10-SDAG-NEXT: global_load_dword v16, v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v16, off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v13i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x3 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v13i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x3 +; GFX11-SDAG-NEXT: global_load_b32 v16, v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v16, off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v13i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x3 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:48 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <13 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <13 x i32> %a + store <13 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v14i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x3 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v14i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x3 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v14i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x3 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b64 v[16:17], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[16:17], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v14i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x3 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <14 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <14 x i32> %a + store <14 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v15i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x3 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v15i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x3 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v15i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x3 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v15i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x3 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b96 v[16:18], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[16:18], off offset:48 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <15 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <15 x i32> %a + store <15 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v16i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x3 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v16i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x3 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v16i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x3 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v16i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x3 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <16 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <16 x i32> %a + store <16 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v17i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x4 +; GFX10-SDAG-NEXT: global_load_dword v20, v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v20, off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v17i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x4 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v17i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x4 +; GFX11-SDAG-NEXT: global_load_b32 v20, v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v20, off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v17i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x4 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:64 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <17 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <17 x i32> %a + store <17 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v18i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x4 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v18i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x4 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v18i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x4 +; GFX11-SDAG-NEXT: global_load_b64 v[20:21], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[20:21], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v18i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x4 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <18 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <18 x i32> %a + store <18 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v19i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x4 +; GFX10-SDAG-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v19i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x4 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v19i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x4 +; GFX11-SDAG-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v19i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x4 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b96 v[20:22], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[20:22], off offset:64 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <19 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <19 x i32> %a + store <19 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v20i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x4 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v20i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x4 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v20i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x4 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v20i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x4 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <20 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <20 x i32> %a + store <20 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v21i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x5 +; GFX10-SDAG-NEXT: global_load_dword v24, v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dword v[2:3], v24, off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v21i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x5 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v21i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x5 +; GFX11-SDAG-NEXT: global_load_b32 v24, v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b32 v[2:3], v24, off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v21i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x5 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b32 v0, v[0:1], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b32 v[2:3], v0, off offset:80 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <21 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <21 x i32> %a + store <21 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v22i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x5 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v22i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x5 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v22i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x5 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b64 v[24:25], v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[24:25], off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v22i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x5 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <22 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <22 x i32> %a + store <22 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v30i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x7 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 +; GFX10-SDAG-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX10-SDAG-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v30i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x7 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX10-GISEL-NEXT: global_load_dwordx2 v[32:33], v[0:1], off offset:112 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[32:33], off offset:112 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v30i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x7 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 +; GFX11-SDAG-NEXT: global_load_b64 v[32:33], v[0:1], off offset:112 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX11-SDAG-NEXT: global_store_b64 v[2:3], v[32:33], off offset:112 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v30i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x7 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 +; GFX11-GISEL-NEXT: global_load_b64 v[0:1], v[0:1], off offset:112 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:112 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <30 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <30 x i32> %a + store <30 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v31i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x7 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 +; GFX10-SDAG-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX10-SDAG-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v31i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x7 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX10-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v31i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x7 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 +; GFX11-SDAG-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX11-SDAG-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v31i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x7 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 +; GFX11-GISEL-NEXT: global_load_b96 v[32:34], v[0:1], off offset:112 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b96 v[2:3], v[32:34], off offset:112 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <31 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <31 x i32> %a + store <31 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_v32i32: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x7 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:96 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:64 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:80 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[28:31], v[0:1], off +; GFX10-SDAG-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:96 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:112 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:64 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:80 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:32 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:48 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[28:31], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:16 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_v32i32: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x7 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_v32i32: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x7 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:96 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off offset:112 +; GFX11-SDAG-NEXT: global_load_b128 v[12:15], v[0:1], off offset:64 +; GFX11-SDAG-NEXT: global_load_b128 v[16:19], v[0:1], off offset:80 +; GFX11-SDAG-NEXT: global_load_b128 v[20:23], v[0:1], off offset:32 +; GFX11-SDAG-NEXT: global_load_b128 v[24:27], v[0:1], off offset:48 +; GFX11-SDAG-NEXT: global_load_b128 v[28:31], v[0:1], off +; GFX11-SDAG-NEXT: global_load_b128 v[32:35], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:96 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(6) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off offset:112 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[12:15], off offset:64 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[16:19], off offset:80 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[20:23], off offset:32 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[24:27], off offset:48 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[28:31], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[32:35], off offset:16 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_v32i32: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x7 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: global_load_b128 v[12:15], v[0:1], off offset:32 +; GFX11-GISEL-NEXT: global_load_b128 v[16:19], v[0:1], off offset:48 +; GFX11-GISEL-NEXT: global_load_b128 v[20:23], v[0:1], off offset:64 +; GFX11-GISEL-NEXT: global_load_b128 v[24:27], v[0:1], off offset:80 +; GFX11-GISEL-NEXT: global_load_b128 v[28:31], v[0:1], off offset:96 +; GFX11-GISEL-NEXT: global_load_b128 v[32:35], v[0:1], off offset:112 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[12:15], off offset:32 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[16:19], off offset:48 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[20:23], off offset:64 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[24:27], off offset:80 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[28:31], off offset:96 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[32:35], off offset:112 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load <32 x i32>, ptr addrspace(1) %ptra, align 4 + %freeze = freeze <32 x i32> %a + store <32 x i32> %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[2:3], v0, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load i32, ptr addrspace(1) %ptra, align 4 + %freeze = freeze i32 %a + store i32 %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load i64, ptr addrspace(1) %ptra, align 4 + %freeze = freeze i64 %a + store i64 %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_float: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dword v0, v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[2:3], v0, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_float: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b32 v0, v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[2:3], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load float, ptr addrspace(1) %ptra, align 4 + %freeze = freeze float %a + store float %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-LABEL: freeze_i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: freeze_i128: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %a = load i128, ptr addrspace(1) %ptra, align 4 + %freeze = freeze i128 %a + store i128 %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} + +define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX10-SDAG-LABEL: freeze_i256: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX10-SDAG-NEXT: global_load_dwordx4 v[8:11], v[0:1], off +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX10-SDAG-NEXT: global_store_dwordx4 v[2:3], v[8:11], off +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: freeze_i256: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: freeze_i256: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 +; GFX11-SDAG-NEXT: global_load_b128 v[8:11], v[0:1], off +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[4:7], off offset:16 +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-NEXT: global_store_b128 v[2:3], v[8:11], off +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: freeze_i256: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off +; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16 +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[8:11], off offset:16 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] + %a = load i256, ptr addrspace(1) %ptra, align 4 + %freeze = freeze i256 %a + store i256 %freeze, ptr addrspace(1) %ptrb, align 4 + ret void +} From dc2d0d5e1a4e7a7524f68aa9739acf22bee13b9e Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Wed, 25 Sep 2024 14:02:58 +0300 Subject: [PATCH 147/212] [Xtensa] Add basic support for inline asm constraints. (#108986) --- llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp | 52 ++++++++++++++ llvm/lib/Target/Xtensa/XtensaAsmPrinter.h | 8 +++ llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 23 +++++++ llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 68 +++++++++++++++++++ llvm/lib/Target/Xtensa/XtensaISelLowering.h | 15 ++++ .../test/CodeGen/Xtensa/inline-asm-invalid.ll | 14 ++++ .../Xtensa/inline-asm-mem-constraint.ll | 46 +++++++++++++ llvm/test/CodeGen/Xtensa/inline-asm.ll | 40 +++++++++++ 8 files changed, 266 insertions(+) create mode 100644 llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll create mode 100644 llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll create mode 100644 llvm/test/CodeGen/Xtensa/inline-asm.ll diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp index 3f99387f759d9..db86637ecf83f 100644 --- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp +++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "XtensaAsmPrinter.h" +#include "MCTargetDesc/XtensaInstPrinter.h" #include "MCTargetDesc/XtensaMCExpr.h" #include "MCTargetDesc/XtensaTargetStreamer.h" #include "TargetInfo/XtensaTargetInfo.h" @@ -157,6 +158,57 @@ void XtensaAsmPrinter::emitConstantPool() { OutStreamer->popSection(); } +void XtensaAsmPrinter::printOperand(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + case MachineOperand::MO_Immediate: { + MCOperand MC = lowerOperand(MI->getOperand(OpNo)); + XtensaInstPrinter::printOperand(MC, O); + break; + } + default: + llvm_unreachable("unknown operand type"); + } +} + +bool XtensaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + // Print the operand if there is no operand modifier. + if (!ExtraCode || !ExtraCode[0]) { + printOperand(MI, OpNo, O); + return false; + } + + // Fallback to the default implementation. + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); +} + +bool XtensaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + const char *ExtraCode, + raw_ostream &OS) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + assert(OpNo + 1 < MI->getNumOperands() && "Insufficient operands"); + + const MachineOperand &Base = MI->getOperand(OpNo); + const MachineOperand &Offset = MI->getOperand(OpNo + 1); + + assert(Base.isReg() && + "Unexpected base pointer for inline asm memory operand."); + assert(Offset.isImm() && "Unexpected offset for inline asm memory operand."); + + OS << XtensaInstPrinter::getRegisterName(Base.getReg()); + OS << ", "; + OS << Offset.getImm(); + + return false; +} + MCSymbol * XtensaAsmPrinter::GetConstantPoolIndexSymbol(const MachineOperand &MO) const { // Create a symbol for the name. diff --git a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h index f9cf5ae8c9f65..1137309cd9a45 100644 --- a/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h +++ b/llvm/lib/Target/Xtensa/XtensaAsmPrinter.h @@ -42,6 +42,14 @@ class LLVM_LIBRARY_VISIBILITY XtensaAsmPrinter : public AsmPrinter { void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &OS) override; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 6f6d3342fcd7f..af1110487b427 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -33,6 +33,10 @@ class XtensaDAGToDAGISel : public SelectionDAGISel { void Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + InlineAsm::ConstraintCode ConstraintID, + std::vector &OutOps) override; + // For load/store instructions generate (base+offset) pair from // memory address. The offset must be a multiple of scale argument. bool selectMemRegAddr(SDValue Addr, SDValue &Base, SDValue &Offset, @@ -212,3 +216,22 @@ void XtensaDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } + +bool XtensaDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, + std::vector &OutOps) { + switch (ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::ConstraintCode::m: { + SDValue Base, Offset; + + selectMemRegAddr(Op, Base, Offset, 4); + OutOps.push_back(Base); + OutOps.push_back(Offset); + + return false; + } + } + return false; +} diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index bc1360e212307..670930e99334f 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -142,6 +142,74 @@ bool XtensaTargetLowering::isOffsetFoldingLegal( return false; } +//===----------------------------------------------------------------------===// +// Inline asm support +//===----------------------------------------------------------------------===// +TargetLowering::ConstraintType +XtensaTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return C_RegisterClass; + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +XtensaTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &Info, const char *Constraint) const { + ConstraintWeight Weight = CW_Invalid; + Value *CallOperandVal = Info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + + Type *Ty = CallOperandVal->getType(); + + // Look at the constraint type. + switch (*Constraint) { + default: + Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint); + break; + case 'r': + if (Ty->isIntegerTy()) + Weight = CW_Register; + break; + } + return Weight; +} + +std::pair +XtensaTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: + break; + case 'r': // General-purpose register + return std::make_pair(0U, &Xtensa::ARRegClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +void XtensaTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, StringRef Constraint, std::vector &Ops, + SelectionDAG &DAG) const { + SDLoc DL(Op); + + // Only support length 1 constraints for now. + if (Constraint.size() > 1) + return; + + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + //===----------------------------------------------------------------------===// // Calling conventions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index 2a878e45047d2..f1cd00c41437a 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -76,6 +76,21 @@ class XtensaTargetLowering : public TargetLowering { const char *getTargetNodeName(unsigned Opcode) const override; + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + TargetLowering::ConstraintType + getConstraintType(StringRef Constraint) const override; + + TargetLowering::ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &Info, + const char *Constraint) const override; + + void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, diff --git a/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll b/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll new file mode 100644 index 0000000000000..2a436dd156dd7 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm-invalid.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: not llc --mtriple=xtensa < %s 2>&1 | FileCheck %s + +define void @constraint_f() nounwind { +; CHECK: error: unknown asm constraint 'f' + tail call void asm "addi a1, a1, $0", "f"(i32 1) + ret void +} + +define i32 @register_a100(i32 %a) nounwind { +; CHECK: error: couldn't allocate input reg for constraint '{$a100}' + %1 = tail call i32 asm "addi $0, $1, 1", "=r,{$a100}"(i32 %a) + ret i32 %1 +} diff --git a/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll b/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll new file mode 100644 index 0000000000000..4b27ba9337f88 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm-mem-constraint.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=xtensa < %s | FileCheck %s --check-prefix=XTENSA + +define i32 @m_offset_0(ptr %p) nounwind { +; XTENSA-LABEL: m_offset_0: +; XTENSA: #APP +; XTENSA-NEXT: l32i a2, a2, 0 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = call i32 asm "l32i $0, $1", "=r,*m"(ptr elementtype(i32) %p) + ret i32 %1 +} + +define i32 @m_offset_1020(ptr %p) nounwind { +; XTENSA-LABEL: m_offset_1020: +; XTENSA: #APP +; XTENSA-NEXT: l32i a2, a2, 1020 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 1020 + %2 = call i32 asm "l32i $0, $1", "=r,*m"(ptr elementtype(i32) %1) + ret i32 %2 +} + +define i8 @m_i8_offset_7(ptr %p) nounwind { +; XTENSA-LABEL: m_i8_offset_7: +; XTENSA: addi a8, a2, 7 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: l8ui a2, a8, 0 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i8, ptr %p, i32 7 + %2 = call i8 asm "l8ui $0, $1", "=r,*m"(ptr elementtype(i8) %1) + ret i8 %2 +} + +define i16 @m_i16_offset_10(ptr %p) nounwind { +; XTENSA-LABEL: m_i16_offset_10: +; XTENSA: #APP +; XTENSA-NEXT: l16si a2, a2, 20 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = getelementptr inbounds i16, ptr %p, i32 10 + %2 = call i16 asm "l16si $0, $1", "=r,*m"(ptr elementtype(i16) %1) + ret i16 %2 +} diff --git a/llvm/test/CodeGen/Xtensa/inline-asm.ll b/llvm/test/CodeGen/Xtensa/inline-asm.ll new file mode 100644 index 0000000000000..748f5f857acfd --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/inline-asm.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=xtensa < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +@gi = external global i32 + +define i32 @constraint_r(i32 %a) { +; XTENSA-LABEL: constraint_r: +; XTENSA: l32r a8, .LCPI0_0 +; XTENSA-NEXT: l32i a8, a8, 0 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: add a2, a2, a8 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = load i32, ptr @gi + %2 = tail call i32 asm "add $0, $1, $2", "=r,r,r"(i32 %a, i32 %1) + ret i32 %2 +} + +define i32 @constraint_i(i32 %a) { +; XTENSA-LABEL: constraint_i: +; XTENSA: #APP +; XTENSA-NEXT: addi a2, a2, 113 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = load i32, ptr @gi + %2 = tail call i32 asm "addi $0, $1, $2", "=r,r,i"(i32 %a, i32 113) + ret i32 %2 +} + +define i32 @explicit_register_a3(i32 %a) nounwind { +; XTENSA-LABEL: explicit_register_a3: +; XTENSA: or a3, a2, a2 +; XTENSA-NEXT: #APP +; XTENSA-NEXT: addi a2, a3, 1 +; XTENSA-NEXT: #NO_APP +; XTENSA-NEXT: ret + %1 = tail call i32 asm "addi $0, $1, 1", "=r,{a3}"(i32 %a) + ret i32 %1 +} From f5838cc17ffb1a0015a0d2687a72bf39b2847f6d Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Wed, 25 Sep 2024 05:40:21 -0500 Subject: [PATCH 148/212] [clang-tools-extra] Don't flush llvm::raw_string_ostream (NFC) Don't call raw_string_ostream::flush(), which is essentially a no-op. As specified in the docs, raw_string_ostream is always unbuffered. ( 65b13610a5226b84889b923bae884ba395ad084d for further reference ) --- .../clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp | 1 - clang-tools-extra/clangd/AST.cpp | 5 ----- clang-tools-extra/clangd/Diagnostics.cpp | 2 -- clang-tools-extra/clangd/FindSymbols.cpp | 1 - clang-tools-extra/clangd/Hover.cpp | 4 ---- clang-tools-extra/clangd/Preamble.cpp | 1 - clang-tools-extra/clangd/Quality.cpp | 1 - clang-tools-extra/clangd/SystemIncludeExtractor.cpp | 1 - clang-tools-extra/clangd/index/StdLib.cpp | 1 - clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp | 1 - clang-tools-extra/include-cleaner/unittests/RecordTest.cpp | 1 - clang-tools-extra/modularize/Modularize.cpp | 1 - 12 files changed, 20 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp index 0b38b18208194..d77df50f8fea2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp @@ -107,7 +107,6 @@ static std::string getNameOfNamespace(const CXXRecordDecl *Decl) { std::string Ns; llvm::raw_string_ostream OStream(Ns); NsDecl->printQualifiedName(OStream); - OStream.flush(); return Ns.empty() ? "(global)" : Ns; } diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp index fda1e5fdf8d82..333fc10f17d7b 100644 --- a/clang-tools-extra/clangd/AST.cpp +++ b/clang-tools-extra/clangd/AST.cpp @@ -187,7 +187,6 @@ std::string printQualifiedName(const NamedDecl &ND) { // In clangd, context is usually available and paths are mostly noise. Policy.AnonymousTagLocations = false; ND.printQualifiedName(OS, Policy); - OS.flush(); assert(!StringRef(QName).starts_with("::")); return QName; } @@ -270,7 +269,6 @@ std::string printTemplateSpecializationArgs(const NamedDecl &ND) { // location information. printTemplateArgumentList(OS, Cls->getTemplateArgs().asArray(), Policy); } - OS.flush(); return TemplateArgs; } @@ -303,7 +301,6 @@ std::string printObjCMethod(const ObjCMethodDecl &Method) { OS << ", ..."; OS << ']'; - OS.flush(); return Name; } @@ -314,7 +311,6 @@ std::string printObjCContainer(const ObjCContainerDecl &C) { const ObjCInterfaceDecl *Class = Category->getClassInterface(); OS << getNameOrErrForObjCInterface(Class) << '(' << Category->getName() << ')'; - OS.flush(); return Name; } if (const ObjCCategoryImplDecl *CID = dyn_cast(&C)) { @@ -322,7 +318,6 @@ std::string printObjCContainer(const ObjCContainerDecl &C) { llvm::raw_string_ostream OS(Name); const ObjCInterfaceDecl *Class = CID->getClassInterface(); OS << getNameOrErrForObjCInterface(Class) << '(' << CID->getName() << ')'; - OS.flush(); return Name; } return C.getNameAsString(); diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp index 552dd36b6900b..a8214acc50558 100644 --- a/clang-tools-extra/clangd/Diagnostics.cpp +++ b/clang-tools-extra/clangd/Diagnostics.cpp @@ -319,7 +319,6 @@ std::string mainMessage(const Diag &D, const ClangdDiagnosticOptions &Opts) { OS << "\n\n"; printDiag(OS, Note); } - OS.flush(); return capitalize(std::move(Result)); } @@ -335,7 +334,6 @@ std::string noteMessage(const Diag &Main, const DiagBase &Note, OS << "\n\n"; printDiag(OS, Main); } - OS.flush(); return capitalize(std::move(Result)); } diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index 55f16b7085a6f..cf2f8b62a2841 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -182,7 +182,6 @@ std::string getSymbolName(ASTContext &Ctx, const NamedDecl &ND) { OS << (Method->isInstanceMethod() ? '-' : '+'); Method->getSelector().print(OS); - OS.flush(); return Name; } return printName(Ctx, ND); diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index de103e011c708..298fa79e3fd0b 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -150,7 +150,6 @@ std::string printDefinition(const Decl *D, PrintingPolicy PP, std::string Definition; llvm::raw_string_ostream OS(Definition); D->print(OS, PP); - OS.flush(); return Definition; } @@ -179,7 +178,6 @@ HoverInfo::PrintedType printType(QualType QT, ASTContext &ASTCtx, OS << TT->getDecl()->getKindName() << " "; } QT.print(OS, PP); - OS.flush(); const Config &Cfg = Config::current(); if (!QT.isNull() && Cfg.Hover.ShowAKA) { @@ -229,7 +227,6 @@ HoverInfo::PrintedType printType(const TemplateTemplateParmDecl *TTP, // FIXME: TemplateTemplateParameter doesn't store the info on whether this // param was a "typename" or "class". OS << "> class"; - OS.flush(); return Result; } @@ -821,7 +818,6 @@ std::string typeAsDefinition(const HoverInfo::PrintedType &PType) { OS << PType.Type; if (PType.AKA) OS << " // aka: " << *PType.AKA; - OS.flush(); return Result; } diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 84e8fec342829..1fe534d78daec 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -913,7 +913,6 @@ PreamblePatch PreamblePatch::create(llvm::StringRef FileName, PP.PatchedMarks = std::move(ModifiedScan->Marks); PP.PatchedMacros = std::move(ModifiedScan->Macros); dlog("Created preamble patch: {0}", Patch.str()); - Patch.flush(); return PP; } diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp index 7371d95fbf275..c1ab63fb22f61 100644 --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -554,7 +554,6 @@ std::string sortText(float Score, llvm::StringRef Name) { llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower, /*Width=*/2 * sizeof(Score)); OS << Name; - OS.flush(); return S; } diff --git a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp index d4b9b173d149d..c1c2e9fab9664 100644 --- a/clang-tools-extra/clangd/SystemIncludeExtractor.cpp +++ b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp @@ -483,7 +483,6 @@ std::string convertGlobToRegex(llvm::StringRef Glob) { } } RegStream << '$'; - RegStream.flush(); return RegText; } diff --git a/clang-tools-extra/clangd/index/StdLib.cpp b/clang-tools-extra/clangd/index/StdLib.cpp index 921ab5d1c96d5..d34838a45048d 100644 --- a/clang-tools-extra/clangd/index/StdLib.cpp +++ b/clang-tools-extra/clangd/index/StdLib.cpp @@ -87,7 +87,6 @@ std::string buildUmbrella(llvm::StringLiteral Mandatory, "#endif\n", Header); } - OS.flush(); return Result; } diff --git a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp index 2f82ec7444d7a..15158d8a45ca8 100644 --- a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp +++ b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp @@ -242,7 +242,6 @@ const NamedDecl &findDeclWithTemplateArgs(ParsedAST &AST, // Use getNameForDiagnostic() which includes the template // arguments in the printed name. ND.getNameForDiagnostic(OS, Policy, /*Qualified=*/true); - OS.flush(); return QName == Query; }); } diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp index 715d95eb57346..0b05c9190cb67 100644 --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -546,7 +546,6 @@ TEST_F(PragmaIncludeTest, IWYUExportBlock) { for (auto &FE : FEs) { OS << FE.getName() << " "; } - OS.flush(); return Result; }; auto Exporters = PI.getExporters(FM.getFile("private1.h").get(), FM); diff --git a/clang-tools-extra/modularize/Modularize.cpp b/clang-tools-extra/modularize/Modularize.cpp index 2c00c76c85533..4bb3bae0503ac 100644 --- a/clang-tools-extra/modularize/Modularize.cpp +++ b/clang-tools-extra/modularize/Modularize.cpp @@ -621,7 +621,6 @@ class CollectEntitiesVisitor std::string Name; llvm::raw_string_ostream OS(Name); ND->printQualifiedName(OS); - OS.flush(); if (Name.empty()) return true; From b40ff5ac2d407074db4479c6e271f51c3f5db4c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Wed, 25 Sep 2024 13:15:23 +0200 Subject: [PATCH 149/212] [AMDGPU][StructurizeCFG] Maintain branch MD_prof metadata (#109813) Currently `StructurizeCFG` drops branch_weight metadata . This metadata can be generated from user annotations in the source code like: ```cpp if (...) [[likely]] { } ``` --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 83 +++++++++++++++---- .../structurizer-keep-perf-md.ll | 8 +- 2 files changed, 71 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index aca8225cebb3f..92e47cbc7ae8b 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" @@ -85,7 +86,43 @@ using PhiMap = MapVector; using BB2BBVecMap = MapVector; using BBPhiMap = DenseMap; -using BBPredicates = DenseMap; + +using MaybeCondBranchWeights = std::optional; + +class CondBranchWeights { + uint32_t TrueWeight; + uint32_t FalseWeight; + + CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {} + +public: + static MaybeCondBranchWeights tryParse(const BranchInst &Br) { + assert(Br.isConditional()); + + uint64_t T, F; + if (!extractBranchWeights(Br, T, F)) + return std::nullopt; + + return CondBranchWeights(T, F); + } + + static void setMetadata(BranchInst &Br, + const MaybeCondBranchWeights &Weights) { + assert(Br.isConditional()); + if (!Weights) + return; + uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight}; + setBranchWeights(Br, Arr, false); + } + + CondBranchWeights invert() const { + return CondBranchWeights{FalseWeight, TrueWeight}; + } +}; + +using ValueWeightPair = std::pair; + +using BBPredicates = DenseMap; using PredMap = DenseMap; using BB2BBMap = DenseMap; @@ -271,7 +308,7 @@ class StructurizeCFG { void analyzeLoops(RegionNode *N); - Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); + ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert); void gatherPredicates(RegionNode *N); @@ -449,16 +486,22 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { } /// Build the condition for one edge -Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, - bool Invert) { +ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, + bool Invert) { Value *Cond = Invert ? BoolFalse : BoolTrue; + MaybeCondBranchWeights Weights; + if (Term->isConditional()) { Cond = Term->getCondition(); + Weights = CondBranchWeights::tryParse(*Term); - if (Idx != (unsigned)Invert) + if (Idx != (unsigned)Invert) { Cond = invertCondition(Cond); + if (Weights) + Weights = Weights->invert(); + } } - return Cond; + return {Cond, Weights}; } /// Analyze the predecessors of each block and build up predicates @@ -490,8 +533,8 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { if (Visited.count(Other) && !Loops.count(Other) && !Pred.count(Other) && !Pred.count(P)) { - Pred[Other] = BoolFalse; - Pred[P] = BoolTrue; + Pred[Other] = {BoolFalse, std::nullopt}; + Pred[P] = {BoolTrue, std::nullopt}; continue; } } @@ -512,9 +555,9 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { BasicBlock *Entry = R->getEntry(); if (Visited.count(Entry)) - Pred[Entry] = BoolTrue; + Pred[Entry] = {BoolTrue, std::nullopt}; else - LPred[Entry] = BoolFalse; + LPred[Entry] = {BoolFalse, std::nullopt}; } } } @@ -578,12 +621,14 @@ void StructurizeCFG::insertConditions(bool Loops) { Dominator.addBlock(Parent); Value *ParentValue = nullptr; - for (std::pair BBAndPred : Preds) { + MaybeCondBranchWeights ParentWeights = std::nullopt; + for (std::pair BBAndPred : Preds) { BasicBlock *BB = BBAndPred.first; - Value *Pred = BBAndPred.second; + auto [Pred, Weight] = BBAndPred.second; if (BB == Parent) { ParentValue = Pred; + ParentWeights = Weight; break; } PhiInserter.AddAvailableValue(BB, Pred); @@ -592,6 +637,7 @@ void StructurizeCFG::insertConditions(bool Loops) { if (ParentValue) { Term->setCondition(ParentValue); + CondBranchWeights::setMetadata(*Term, ParentWeights); } else { if (!Dominator.resultIsRememberedBlock()) PhiInserter.AddAvailableValue(Dominator.result(), Default); @@ -607,7 +653,7 @@ void StructurizeCFG::simplifyConditions() { for (auto &I : concat(Predicates, LoopPreds)) { auto &Preds = I.second; for (auto &J : Preds) { - auto &Cond = J.second; + Value *Cond = J.second.first; Instruction *Inverted; if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && !Cond->use_empty()) { @@ -904,9 +950,10 @@ void StructurizeCFG::setPrevNode(BasicBlock *BB) { /// Does BB dominate all the predicates of Node? bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { BBPredicates &Preds = Predicates[Node->getEntry()]; - return llvm::all_of(Preds, [&](std::pair Pred) { - return DT->dominates(BB, Pred.first); - }); + return llvm::all_of(Preds, + [&](std::pair Pred) { + return DT->dominates(BB, Pred.first); + }); } /// Can we predict that this node will always be called? @@ -918,9 +965,9 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { if (!PrevNode) return true; - for (std::pair Pred : Preds) { + for (std::pair Pred : Preds) { BasicBlock *BB = Pred.first; - Value *V = Pred.second; + Value *V = Pred.second.first; if (V != BoolTrue) return false; diff --git a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll index 862c50c6183f1..cdf5ca569701b 100644 --- a/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll +++ b/llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll @@ -5,7 +5,7 @@ define amdgpu_ps i32 @if_else(i32 %0) { ; OPT-LABEL: define amdgpu_ps i32 @if_else( ; OPT-SAME: i32 [[TMP0:%.*]]) { ; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0 -; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]] +; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]], !prof [[PROF0:![0-9]+]] ; OPT: [[FLOW]]: ; OPT-NEXT: [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ] ; OPT-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ] @@ -40,7 +40,7 @@ define amdgpu_ps void @loop_if_break(i32 %n) { ; OPT: [[LOOP]]: ; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ] ; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0 -; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]] +; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]], !prof [[PROF1:![0-9]+]] ; OPT: [[LOOP_BODY]]: ; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1 ; OPT-NEXT: br label %[[FLOW]] @@ -70,3 +70,7 @@ exit: ; preds = %loop attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } !0 = !{!"branch_weights", i32 1000, i32 1} +;. +; OPT: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000} +; OPT: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1} +;. From 9583215d55b639f9fc28400b560f9e66c13db13a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 25 Sep 2024 19:38:53 +0800 Subject: [PATCH 150/212] [RISCV] Add splat tests for zvfbfmin and without zfhmin/zfbfmin. NFC This exercises the lowering path when the scalar type isn't legal. --- llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll | 91 +++++++++++++++-------- 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll index 8317690e3fd25..c29c2533b8499 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll @@ -1,20 +1,38 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfh,+v,+optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,OPTIMIZED -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfh,+v,+optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,OPTIMIZED -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfh,+v -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,NOT-OPTIMIZED -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfh,+v -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,NOT-OPTIMIZED -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfhmin,+v,+optimized-zero-stride-load -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,OPTIMIZED -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfhmin,+v,+optimized-zero-stride-load -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,OPTIMIZED -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+zvfhmin,+v -target-abi ilp32d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,NOT-OPTIMIZED -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+zvfhmin,+v -target-abi lp64d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,NOT-OPTIMIZED +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZFMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZFMIN + +define @vsplat_nxv8bf16(bfloat %f) { +; NOZFMIN-LABEL: vsplat_nxv8bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a0, fa0 +; NOZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a0 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vsplat_nxv8bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a0, fa0 +; ZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a0 +; ZFMIN-NEXT: ret + %head = insertelement poison, bfloat %f, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + ret %splat +} + +define @vsplat_zero_nxv8bf16() { +; CHECK-LABEL: vsplat_zero_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + ret splat (bfloat zeroinitializer) +} define @vsplat_nxv8f16(half %f) { ; ZVFH-LABEL: vsplat_nxv8f16: @@ -25,10 +43,17 @@ define @vsplat_nxv8f16(half %f) { ; ; ZVFHMIN-LABEL: vsplat_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.h a0, fa0 +; ZVFHMIN-NEXT: fmv.x.w a0, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vsplat_nxv8f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a0, fa0 +; ZFMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a0 +; ZFMIN-NEXT: ret %head = insertelement poison, half %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer ret %splat @@ -83,20 +108,26 @@ define @vsplat_zero_nxv8f64() { ret splat (double zeroinitializer) } -; Test that we fold this to a vlse with 0 stride. define @vsplat_load_nxv8f32(ptr %ptr) { -; OPTIMIZED-LABEL: vsplat_load_nxv8f32: -; OPTIMIZED: # %bb.0: -; OPTIMIZED-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; OPTIMIZED-NEXT: vlse32.v v8, (a0), zero -; OPTIMIZED-NEXT: ret -; -; NOT-OPTIMIZED-LABEL: vsplat_load_nxv8f32: -; NOT-OPTIMIZED: # %bb.0: -; NOT-OPTIMIZED-NEXT: flw fa5, 0(a0) -; NOT-OPTIMIZED-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; NOT-OPTIMIZED-NEXT: vfmv.v.f v8, fa5 -; NOT-OPTIMIZED-NEXT: ret +; CHECK-LABEL: vsplat_load_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: flw fa5, 0(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa5 +; CHECK-NEXT: ret + %f = load float, ptr %ptr + %head = insertelement poison, float %f, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + ret %splat +} + +; Test that we fold this to a vlse with 0 stride. +define @vsplat_load_nxv8f32_optimized(ptr %ptr) "target-features"="+optimized-zero-stride-load" { +; CHECK-LABEL: vsplat_load_nxv8f32_optimized: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vlse32.v v8, (a0), zero +; CHECK-NEXT: ret %f = load float, ptr %ptr %head = insertelement poison, float %f, i32 0 %splat = shufflevector %head, poison, zeroinitializer From 2a29d24ba94dac82e838c694595a0a574e505aab Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 25 Sep 2024 12:43:41 +0100 Subject: [PATCH 151/212] [ADT] Use perfect forwarding in SmallSet::insert() (#108590) Previously this method took arguments by const-ref. This patch changes the implementation to take perfectly forwarded arguments in the form of a universal reference. Now, the insertion method will take advantage of arguments passed as rvalue, potentially leading to performance improvements. --- llvm/include/llvm/ADT/SmallSet.h | 46 +++++++++++++++++------------ llvm/unittests/ADT/SmallSetTest.cpp | 34 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index 8d7511bf0bc8d..56259ea7cf9d0 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -161,26 +161,10 @@ class SmallSet { /// Returns a pair. The first value of it is an iterator to the inserted /// element or the existing element in the set. The second value is true /// if the element is inserted (it was not in the set before). - std::pair insert(const T &V) { - if (!isSmall()) { - auto [I, Inserted] = Set.insert(V); - return std::make_pair(const_iterator(I), Inserted); - } - - auto I = std::find(Vector.begin(), Vector.end(), V); - if (I != Vector.end()) // Don't reinsert if it already exists. - return std::make_pair(const_iterator(I), false); - if (Vector.size() < N) { - Vector.push_back(V); - return std::make_pair(const_iterator(std::prev(Vector.end())), true); - } + std::pair insert(const T &V) { return insertImpl(V); } - // Otherwise, grow from vector to set. - while (!Vector.empty()) { - Set.insert(Vector.back()); - Vector.pop_back(); - } - return std::make_pair(const_iterator(Set.insert(V).first), true); + std::pair insert(T &&V) { + return insertImpl(std::move(V)); } template @@ -226,6 +210,30 @@ class SmallSet { private: bool isSmall() const { return Set.empty(); } + + template + std::pair insertImpl(ArgType &&V) { + static_assert(std::is_convertible_v, + "ArgType must be convertible to T!"); + if (!isSmall()) { + auto [I, Inserted] = Set.insert(std::forward(V)); + return {const_iterator(I), Inserted}; + } + + auto I = std::find(Vector.begin(), Vector.end(), V); + if (I != Vector.end()) // Don't reinsert if it already exists. + return {const_iterator(I), false}; + if (Vector.size() < N) { + Vector.push_back(std::forward(V)); + return {const_iterator(std::prev(Vector.end())), true}; + } + + // Otherwise, grow from vector to set. + Set.insert(std::make_move_iterator(Vector.begin()), + std::make_move_iterator(Vector.end())); + Vector.clear(); + return {const_iterator(Set.insert(std::forward(V)).first), true}; + } }; /// If this set is of pointer values, transparently switch over to using diff --git a/llvm/unittests/ADT/SmallSetTest.cpp b/llvm/unittests/ADT/SmallSetTest.cpp index b50b368ae6636..0fb20b19df925 100644 --- a/llvm/unittests/ADT/SmallSetTest.cpp +++ b/llvm/unittests/ADT/SmallSetTest.cpp @@ -41,6 +41,40 @@ TEST(SmallSetTest, Insert) { EXPECT_EQ(0u, s1.count(4)); } +TEST(SmallSetTest, InsertPerfectFwd) { + struct Value { + int Key; + bool Moved; + + Value(int Key) : Key(Key), Moved(false) {} + Value(const Value &) = default; + Value(Value &&Other) : Key(Other.Key), Moved(false) { Other.Moved = true; } + bool operator==(const Value &Other) const { return Key == Other.Key; } + bool operator<(const Value &Other) const { return Key < Other.Key; } + }; + + { + SmallSet S; + Value V1(1), V2(2); + + S.insert(V1); + EXPECT_EQ(V1.Moved, false); + + S.insert(std::move(V2)); + EXPECT_EQ(V2.Moved, true); + } + { + SmallSet S; + Value V1(1), V2(2); + + S.insert(V1); + EXPECT_EQ(V1.Moved, false); + + S.insert(std::move(V2)); + EXPECT_EQ(V2.Moved, true); + } +} + TEST(SmallSetTest, Grow) { SmallSet s1; From 786dc5a2da9bb55d98c65d018de25d9bd31485ff Mon Sep 17 00:00:00 2001 From: Adrian Vogelsgesang Date: Wed, 25 Sep 2024 13:49:42 +0200 Subject: [PATCH 152/212] [lldb-dap] Simplify `readMemory` (#109485) The `readMemory` request used the `MemoryRegionInfo` so it could also support short reads. Since #106532, this is no longer necessary, as mentioned by @labath in a comment on #104317. With this commit, we no longer set the `unreadableBytes` in the result. But this is optional, anyway, according to the spec, and afaik the VS Code UI does not make good use of `unreadableBytes`, anyway. We prefer `SBTarget::ReadMemory` over `SBProcess::ReadMemory`, because the `memory read` command also reads memory through the target instead of the process, and because users would expect the UI view and the results from memory read to be in-sync. --- .../tools/lldb-dap/memory/TestDAP_memory.py | 8 ++- lldb/tools/lldb-dap/lldb-dap.cpp | 69 +++++-------------- 2 files changed, 23 insertions(+), 54 deletions(-) diff --git a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py index 1082541aebcf7..ea43fccf016a7 100644 --- a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py +++ b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py @@ -93,15 +93,18 @@ def test_readMemory(self): # We can read the complete string mem = self.dap_server.request_readMemory(memref, 0, 5)["body"] - self.assertEqual(mem["unreadableBytes"], 0) self.assertEqual(b64decode(mem["data"]), b"dead\0") + # We can read large chunks, potentially returning partial results + mem = self.dap_server.request_readMemory(memref, 0, 4096)["body"] + self.assertEqual(b64decode(mem["data"])[0:5], b"dead\0") + # Use an offset mem = self.dap_server.request_readMemory(memref, 2, 3)["body"] self.assertEqual(b64decode(mem["data"]), b"ad\0") # Reads of size 0 are successful - # VS-Code sends those in order to check if a `memoryReference` can actually be dereferenced. + # VS Code sends those in order to check if a `memoryReference` can actually be dereferenced. mem = self.dap_server.request_readMemory(memref, 0, 0) self.assertEqual(mem["success"], True) self.assertEqual(mem["body"]["data"], "") @@ -109,4 +112,3 @@ def test_readMemory(self): # Reads at offset 0x0 fail mem = self.dap_server.request_readMemory("0x0", 0, 6) self.assertEqual(mem["success"], False) - self.assertEqual(mem["message"], "Memory region is not readable") diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index c7653fed2def4..f692d77347038 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -4422,14 +4422,6 @@ void request_readMemory(const llvm::json::Object &request) { FillResponse(request, response); auto *arguments = request.getObject("arguments"); - lldb::SBProcess process = g_dap.target.GetProcess(); - if (!process.IsValid()) { - response["success"] = false; - response["message"] = "No process running"; - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - llvm::StringRef memoryReference = GetString(arguments, "memoryReference"); auto addr_opt = DecodeMemoryReference(memoryReference); if (!addr_opt.has_value()) { @@ -4439,57 +4431,32 @@ void request_readMemory(const llvm::json::Object &request) { g_dap.SendJSON(llvm::json::Value(std::move(response))); return; } - lldb::addr_t addr = *addr_opt; - - addr += GetSigned(arguments, "offset", 0); - const uint64_t requested_count = GetUnsigned(arguments, "count", 0); - lldb::SBMemoryRegionInfo region_info; - lldb::SBError memreg_error = process.GetMemoryRegionInfo(addr, region_info); - if (memreg_error.Fail()) { - response["success"] = false; - EmplaceSafeString(response, "message", - "Unable to find memory region: " + - std::string(memreg_error.GetCString())); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - if (!region_info.IsReadable()) { + lldb::addr_t addr_int = *addr_opt; + addr_int += GetSigned(arguments, "offset", 0); + const uint64_t count_requested = GetUnsigned(arguments, "count", 0); + + // We also need support reading 0 bytes + // VS Code sends those requests to check if a `memoryReference` + // can be dereferenced. + const uint64_t count_read = std::max(count_requested, 1); + std::vector buf; + buf.resize(count_read); + lldb::SBError error; + lldb::SBAddress addr{addr_int, g_dap.target}; + size_t count_result = + g_dap.target.ReadMemory(addr, buf.data(), count_read, error); + if (count_result == 0) { response["success"] = false; - response.try_emplace("message", "Memory region is not readable"); + EmplaceSafeString(response, "message", error.GetCString()); g_dap.SendJSON(llvm::json::Value(std::move(response))); return; } - const uint64_t available_count = - std::min(requested_count, region_info.GetRegionEnd() - addr); - const uint64_t unavailable_count = requested_count - available_count; - - std::vector buf; - buf.resize(available_count); - if (available_count > 0) { - lldb::SBError memread_error; - uint64_t bytes_read = - process.ReadMemory(addr, buf.data(), available_count, memread_error); - if (memread_error.Fail()) { - response["success"] = false; - EmplaceSafeString(response, "message", - "Unable to read memory: " + - std::string(memread_error.GetCString())); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - if (bytes_read != available_count) { - response["success"] = false; - EmplaceSafeString(response, "message", "Unexpected, short read"); - g_dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - } + buf.resize(std::min(count_result, count_requested)); llvm::json::Object body; - std::string formatted_addr = "0x" + llvm::utohexstr(addr); + std::string formatted_addr = "0x" + llvm::utohexstr(addr_int); body.try_emplace("address", formatted_addr); body.try_emplace("data", llvm::encodeBase64(buf)); - body.try_emplace("unreadableBytes", unavailable_count); response.try_emplace("body", std::move(body)); g_dap.SendJSON(llvm::json::Value(std::move(response))); } From 1c984b86b389bbc71c8c2988d1d707e2f32878bd Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 25 Sep 2024 04:50:09 -0700 Subject: [PATCH 153/212] [LLVM][TableGen] Adopt !listflatten for Intrinsic type signature (#109884) Intrinisc type signature is a `list>` that hold IIT encoding for each param/ret type (outer list) where the IIT encoding for each type itself can be 0 or more integers (the inner list). Intrinsic emitter flatten this list into generate the type signature in `ComputeTypeSignature`. Use the new !listflatten() operator to instead flatten the list in the TableGen definition and eliminate flattening in the emitter code. Verified that `-gen-intrinsic-impl` output for Intrinsics.td is identical with and without the change. --- llvm/include/llvm/IR/Intrinsics.td | 4 ++-- llvm/utils/TableGen/IntrinsicEmitter.cpp | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 48d57907e6d0b..079ac61adef6e 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -626,7 +626,7 @@ class TypeInfoGen< list Types = !foreach(ty, AllTypes, !if(!isa(ty), ACTys[MappingRIdxs[ty.Number]], ty)); - list> TypeSig = !listconcat( + list TypeSig = !listflatten(!listconcat( [IIT_RetNumbers[!size(RetTypes)]], !foreach(i, !range(AllTypes), !foreach(a, AllTypes[i].Sig, @@ -634,7 +634,7 @@ class TypeInfoGen< MappingRIdxs, ArgCodes, ACIdxs[i], - a>.ret))); + a>.ret)))); } //===----------------------------------------------------------------------===// diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 51c2e9a12e00c..efa067e60de43 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -276,12 +276,10 @@ using TypeSigTy = SmallVector; static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) { TypeSigTy TypeSig; const Record *TypeInfo = Int.TheDef->getValueAsDef("TypeInfo"); - const ListInit *OuterList = TypeInfo->getValueAsListInit("TypeSig"); + const ListInit *TypeList = TypeInfo->getValueAsListInit("TypeSig"); - for (const auto *Outer : OuterList->getValues()) { - for (const auto *Inner : cast(Outer)->getValues()) - TypeSig.emplace_back(cast(Inner)->getValue()); - } + for (const auto *TypeListEntry : TypeList->getValues()) + TypeSig.emplace_back(cast(TypeListEntry)->getValue()); return TypeSig; } From fe06a6daae6be85d47cd1e51654e91f9ac6e63d7 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 25 Sep 2024 14:12:49 +0200 Subject: [PATCH 154/212] Reland: [clang] Diagnose dangling issues for the "Container" case. #107213 (#108344) This relands #107213, with with fixes to address false positives (`make_optional(nullptr)`). --- clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Basic/AttrDocs.td | 14 ++ clang/lib/Sema/CheckExprLifetime.cpp | 151 +++++++++++++- .../Sema/warn-lifetime-analysis-nocfg.cpp | 190 ++++++++++++++++++ 4 files changed, 351 insertions(+), 6 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5923888383022..14907e7db18de 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -336,6 +336,8 @@ Improvements to Clang's diagnostics local variables passed to function calls using the ``[[clang::musttail]]`` attribute. +- Clang now diagnoses cases where a dangling ``GSLOwner`` object is constructed, e.g. ``std::vector v = {std::string()};`` (#GH100526). + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index f23a148e546fa..53d88482698f0 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6696,6 +6696,20 @@ When the Owner's lifetime ends, it will consider the Pointer to be dangling. P.getInt(); // P is dangling } +If a template class is annotated with ``[[gsl::Owner]]``, and the first +instantiated template argument is a pointer type (raw pointer, or ``[[gsl::Pointer]]``), +the analysis will consider the instantiated class as a container of the pointer. +When constructing such an object from a GSL owner object, the analysis will +assume that the container holds a pointer to the owner object. Consequently, +when the owner object is destroyed, the pointer will be considered dangling. + +.. code-block:: c++ + + int f() { + std::vector v = {std::string()}; // v holds a dangling pointer. + std::optional o = std::string(); // o holds a dangling pointer. + } + }]; } diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e9e39c11ffbaa..009b8d000e6b0 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -271,6 +271,49 @@ static bool isInStlNamespace(const Decl *D) { return DC->isStdNamespace(); } +static bool isPointerLikeType(QualType Type) { + return isRecordWithAttr(Type) || Type->isPointerType() || + Type->isNullPtrType(); +} + +// Returns true if the given Record decl is a form of `GSLOwner` +// type, e.g. std::vector, std::optional. +static bool isContainerOfPointer(const RecordDecl *Container) { + if (const auto *CTSD = + dyn_cast_if_present(Container)) { + if (!CTSD->hasAttr()) // Container must be a GSL owner type. + return false; + const auto &TAs = CTSD->getTemplateArgs(); + return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type && + isPointerLikeType(TAs[0].getAsType()); + } + return false; +} +static bool isContainerOfOwner(const RecordDecl *Container) { + const auto *CTSD = + dyn_cast_if_present(Container); + if (!CTSD) + return false; + if (!CTSD->hasAttr()) // Container must be a GSL owner type. + return false; + const auto &TAs = CTSD->getTemplateArgs(); + return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type && + isRecordWithAttr(TAs[0].getAsType()); +} + +// Returns true if the given Record is `std::initializer_list`. +static bool isStdInitializerListOfPointer(const RecordDecl *RD) { + if (const auto *CTSD = + dyn_cast_if_present(RD)) { + const auto &TAs = CTSD->getTemplateArgs(); + return isInStlNamespace(RD) && RD->getIdentifier() && + RD->getName() == "initializer_list" && TAs.size() > 0 && + TAs[0].getKind() == TemplateArgument::Type && + isPointerLikeType(TAs[0].getAsType()); + } + return false; +} + static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { if (auto *Conv = dyn_cast_or_null(Callee)) if (isRecordWithAttr(Conv->getConversionType()) && @@ -282,8 +325,7 @@ static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { Callee->getFunctionObjectParameterType()) && !isRecordWithAttr(Callee->getFunctionObjectParameterType())) return false; - if (Callee->getReturnType()->isPointerType() || - isRecordWithAttr(Callee->getReturnType())) { + if (isPointerLikeType(Callee->getReturnType())) { if (!Callee->getIdentifier()) return false; return llvm::StringSwitch(Callee->getName()) @@ -331,6 +373,103 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) { return false; } +// Returns true if the given constructor is a copy-like constructor, such as +// `Ctor(Owner&&)` or `Ctor(const Owner&)`. +static bool isCopyLikeConstructor(const CXXConstructorDecl *Ctor) { + if (!Ctor || Ctor->param_size() != 1) + return false; + const auto *ParamRefType = + Ctor->getParamDecl(0)->getType()->getAs(); + if (!ParamRefType) + return false; + + // Check if the first parameter type is "Owner". + if (const auto *TST = + ParamRefType->getPointeeType()->getAs()) + return TST->getTemplateName() + .getAsTemplateDecl() + ->getTemplatedDecl() + ->hasAttr(); + return false; +} + +// Returns true if we should perform the GSL analysis on the first argument for +// the given constructor. +static bool +shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) { + const auto *LHSRecordDecl = Ctor->getConstructor()->getParent(); + + // Case 1, construct a GSL pointer, e.g. std::string_view + // Always inspect when LHS is a pointer. + if (LHSRecordDecl->hasAttr()) + return true; + + if (Ctor->getConstructor()->getNumParams() != 1 || + !isContainerOfPointer(LHSRecordDecl)) + return false; + + // Now, the LHS is an Owner type, e.g., std::vector. + // + // At a high level, we cannot precisely determine what the nested pointer + // owns. However, by analyzing the RHS owner type, we can use heuristics to + // infer ownership information. These heuristics are designed to be + // conservative, minimizing false positives while still providing meaningful + // diagnostics. + // + // While this inference isn't perfect, it helps catch common use-after-free + // patterns. + auto RHSArgType = Ctor->getArg(0)->getType(); + const auto *RHSRD = RHSArgType->getAsRecordDecl(); + // LHS is constructed from an intializer_list. + // + // std::initializer_list is a proxy object that provides access to the backing + // array. We perform analysis on it to determine if there are any dangling + // temporaries in the backing array. + // E.g. std::vector abc = {string()}; + if (isStdInitializerListOfPointer(RHSRD)) + return true; + + // RHS must be an owner. + if (!isRecordWithAttr(RHSArgType)) + return false; + + // Bail out if the RHS is Owner. + // + // We cannot reliably determine what the LHS nested pointer owns -- it could + // be the entire RHS or the nested pointer in RHS. To avoid false positives, + // we skip this case, such as: + // std::stack s(std::deque{}); + // + // TODO: this also has a false negative, it doesn't catch the case like: + // std::optional> os = std::vector{} + if (isContainerOfPointer(RHSRD)) + return false; + + // Assume that the nested Pointer is constructed from the nested Owner. + // E.g. std::optional sv = std::optional(s); + if (isContainerOfOwner(RHSRD)) + return true; + + // Now, the LHS is an Owner and the RHS is an Owner, where X is + // neither an `Owner` nor a `Pointer`. + // + // Use the constructor's signature as a hint. If it is a copy-like constructor + // `Owner1(Owner2&&)`, we assume that the nested pointer is + // constructed from X. In such cases, we do not diagnose, as `X` is not an + // owner, e.g. + // std::optional sv = std::optional(); + if (const auto *PrimaryCtorTemplate = + Ctor->getConstructor()->getPrimaryTemplate(); + PrimaryCtorTemplate && + isCopyLikeConstructor(dyn_cast_if_present( + PrimaryCtorTemplate->getTemplatedDecl()))) { + return false; + } + // Assume that the nested pointer is constructed from the whole RHS. + // E.g. optional s = std::string(); + return true; +} + // Return true if this is an "normal" assignment operator. // We assuments that a normal assingment operator always returns *this, that is, // an lvalue reference that is the same type as the implicit object parameter @@ -473,12 +612,12 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, if (CheckCoroCall || Callee->getParamDecl(I)->hasAttr()) VisitLifetimeBoundArg(Callee->getParamDecl(I), Args[I]); else if (EnableGSLAnalysis && I == 0) { + // Perform GSL analysis for the first argument if (shouldTrackFirstArgument(Callee)) { VisitGSLPointerArg(Callee, Args[0]); - } else if (auto *CCE = dyn_cast(Call); - CCE && - CCE->getConstructor()->getParent()->hasAttr()) { - VisitGSLPointerArg(CCE->getConstructor(), Args[0]); + } else if (auto *Ctor = dyn_cast(Call); + Ctor && shouldTrackFirstArgumentForConstructor(Ctor)) { + VisitGSLPointerArg(Ctor->getConstructor(), Args[0]); } } } diff --git a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp index 69e5395a78a57..c6272a775a28f 100644 --- a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp +++ b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp @@ -158,17 +158,30 @@ auto begin(C &c) -> decltype(c.begin()); template T *begin(T (&array)[N]); +using size_t = decltype(sizeof(0)); + +template +struct initializer_list { + const T* ptr; size_t sz; +}; template struct vector { typedef __gnu_cxx::basic_iterator iterator; iterator begin(); iterator end(); const T *data() const; + vector(); + vector(initializer_list __l); + + template + vector(InputIterator first, InputIterator __last); + T &at(int n); }; template struct basic_string_view { + basic_string_view(); basic_string_view(const T *); const T *begin() const; }; @@ -203,11 +216,21 @@ template struct optional { optional(); optional(const T&); + + template + optional(U&& t); + + template + optional(optional&& __t); + T &operator*() &; T &&operator*() &&; T &value() &; T &&value() &&; }; +template +optional<__decay(T)> make_optional(T&&); + template struct stack { @@ -587,3 +610,170 @@ std::string_view test2() { return k.value(); // expected-warning {{address of stack memory associated}} } } // namespace GH108272 + +namespace GH100526 { +void test() { + std::vector v1({std::string()}); // expected-warning {{object backing the pointer will be destroyed at the end}} + std::vector v2({ + std::string(), // expected-warning {{object backing the pointer will be destroyed at the end}} + std::string_view() + }); + std::vector v3({ + std::string_view(), + std::string() // expected-warning {{object backing the pointer will be destroyed at the end}} + }); + + std::optional o1 = std::string(); // expected-warning {{object backing the pointer}} + + std::string s; + // This is a tricky use-after-free case, what it does: + // 1. make_optional creates a temporary "optional"" object + // 2. the temporary object owns the underlying string which is copied from s. + // 3. the t3 object holds the view to the underlying string of the temporary object. + std::optional o2 = std::make_optional(s); // expected-warning {{object backing the pointer}} + std::optional o3 = std::optional(s); // expected-warning {{object backing the pointer}} + std::optional o4 = std::optional(s); + + // FIXME: should work for assignment cases + v1 = {std::string()}; + o1 = std::string(); + + // no warning on copying pointers. + std::vector n1 = {std::string_view()}; + std::optional n2 = {std::string_view()}; + std::optional n3 = std::string_view(); + std::optional n4 = std::make_optional(std::string_view()); + const char* b = ""; + std::optional n5 = std::make_optional(b); + std::optional n6 = std::make_optional("test"); +} + +std::vector test2(int i) { + std::vector t; + if (i) + return t; // this is fine, no dangling + return std::vector(t.begin(), t.end()); +} + +class Foo { + public: + operator std::string_view() const { return ""; } +}; +class [[gsl::Owner]] FooOwner { + public: + operator std::string_view() const { return ""; } +}; +std::optional GetFoo(); +std::optional GetFooOwner(); + +template +struct [[gsl::Owner]] Container1 { + Container1(); +}; +template +struct [[gsl::Owner]] Container2 { + template + Container2(const Container1& C2); +}; + +std::optional test3(int i) { + std::string s; + std::string_view sv; + if (i) + return s; // expected-warning {{address of stack memory associated}} + return sv; // fine + Container2 c1 = Container1(); // no diagnostic as Foo is not an Owner. + Container2 c2 = Container1(); // expected-warning {{object backing the pointer will be destroyed}} + return GetFoo(); // fine, we don't know Foo is owner or not, be conservative. + return GetFooOwner(); // expected-warning {{returning address of local temporary object}} +} + +std::optional test4(int a) { + return std::make_optional(nullptr); // fine +} + + +template +struct [[gsl::Owner]] StatusOr { + const T &valueLB() const [[clang::lifetimebound]]; + const T &valueNoLB() const; +}; + +template +struct [[gsl::Pointer]] Span { + Span(const std::vector &V); + + const int& getFieldLB() const [[clang::lifetimebound]]; + const int& getFieldNoLB() const; +}; + + +/////// From Owner /////// + +// Pointer from Owner +std::string_view test5() { + std::string_view a = StatusOr().valueLB(); // expected-warning {{object backing the pointer will be dest}} +return StatusOr().valueLB(); // expected-warning {{returning address of local temporary}} + + // No dangling diagnostics on non-lifetimebound methods. + std::string_view b = StatusOr().valueNoLB(); + return StatusOr().valueNoLB(); +} + +// Pointer from Owner +// Prevent regression GH108463 +Span test6(std::vector v) { + Span dangling = std::vector(); // expected-warning {{object backing the pointer}} + return v; // expected-warning {{address of stack memory}} +} + +/////// From Owner> /////// + +// Pointer from Owner> +int* test7(StatusOr> aa) { + // No dangling diagnostic on pointer. + return aa.valueLB().valueLB(); // OK. +} + +// Owner from Owner> +std::vector test8(StatusOr> aa) { + return aa.valueLB(); // OK, no pointer being construct on this case. + return aa.valueNoLB(); +} + +// Pointer from Owner> +Span test9(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory associated}} + return aa.valueNoLB(); // OK. +} + +/////// From Owner /////// + +// Pointer> from Owner +Span test10(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory}} + return aa.valueNoLB(); // OK. +} + +/////// From Owner> /////// + +// Pointer> from Owner> +Span test11(StatusOr> aa) { + return aa.valueLB(); // expected-warning {{address of stack memory}} + return aa.valueNoLB(); // OK. +} + +// Lifetimebound and gsl::Pointer. +const int& test12(Span a) { + return a.getFieldLB(); // expected-warning {{reference to stack memory associated}} + return a.getFieldNoLB(); // OK. +} + +void test13() { + // FIXME: RHS is Owner, we skip this case to avoid false positives. + std::optional> abc = std::vector{}; + + std::optional> t = std::vector {}; // expected-warning {{object backing the pointer will be destroyed}} +} + +} // namespace GH100526 From 59693ea6d1822d8cf43db8090ddb4c8d7a78f471 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 25 Sep 2024 20:20:03 +0800 Subject: [PATCH 155/212] [ConstantFPRange] Remove `ConstantFPRange::toKnownFPClass` (#109960) Addresses comment https://github.com/llvm/llvm-project/pull/86483#pullrequestreview-2327710679. --- llvm/include/llvm/IR/ConstantFPRange.h | 3 --- llvm/lib/IR/ConstantFPRange.cpp | 8 -------- llvm/unittests/IR/ConstantFPRangeTest.cpp | 8 -------- 3 files changed, 19 deletions(-) diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h index 23f0e8b8e0d13..67f9f945d748b 100644 --- a/llvm/include/llvm/IR/ConstantFPRange.h +++ b/llvm/include/llvm/IR/ConstantFPRange.h @@ -175,9 +175,6 @@ class [[nodiscard]] ConstantFPRange { /// Return the FPClassTest which will return true for the value. FPClassTest classify() const; - /// Return known floating-point classes for values in this range. - KnownFPClass toKnownFPClass() const; - /// Print out the bounds to a stream. void print(raw_ostream &OS) const; diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp index 58aab353b4393..957701891c8f3 100644 --- a/llvm/lib/IR/ConstantFPRange.cpp +++ b/llvm/lib/IR/ConstantFPRange.cpp @@ -8,7 +8,6 @@ #include "llvm/IR/ConstantFPRange.h" #include "llvm/ADT/APFloat.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -196,13 +195,6 @@ FPClassTest ConstantFPRange::classify() const { return static_cast(Mask); } -KnownFPClass ConstantFPRange::toKnownFPClass() const { - KnownFPClass Result; - Result.KnownFPClasses = classify(); - Result.SignBit = getSignBit(); - return Result; -} - void ConstantFPRange::print(raw_ostream &OS) const { if (isFullSet()) OS << "full-set"; diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp index bf6ea95c00e22..722e6566730da 100644 --- a/llvm/unittests/IR/ConstantFPRangeTest.cpp +++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp @@ -7,13 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/ConstantFPRange.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/Sequence.h" -#include "llvm/ADT/SmallBitVector.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/KnownBits.h" #include "gtest/gtest.h" using namespace llvm; @@ -363,14 +358,11 @@ TEST_F(ConstantFPRangeTest, FPClassify) { EXPECT_EQ(SomeNeg.classify(), fcNegFinite); EXPECT_EQ(PosInf.classify(), fcPosInf); EXPECT_EQ(NegInf.classify(), fcNegInf); - EXPECT_TRUE(SomePos.toKnownFPClass().cannotBeOrderedLessThanZero()); EXPECT_EQ(Finite.getSignBit(), std::nullopt); EXPECT_EQ(PosZero.getSignBit(), false); EXPECT_EQ(NegZero.getSignBit(), true); EXPECT_EQ(SomePos.getSignBit(), false); EXPECT_EQ(SomeNeg.getSignBit(), true); - EXPECT_EQ(SomePos.toKnownFPClass().SignBit, false); - EXPECT_EQ(SomeNeg.toKnownFPClass().SignBit, true); EnumerateConstantFPRanges( [](const ConstantFPRange &CR) { From 1e67e4bbba2a90ecaf5340acef110972413e3e5b Mon Sep 17 00:00:00 2001 From: Abhina Sree Date: Wed, 25 Sep 2024 08:21:29 -0400 Subject: [PATCH 156/212] [SystemZ][z/OS] z/OS does not support nanosleep, use usleep instead (#109823) Use usleep instead of nanosleep to resolve a build error on z/OS because there is no support for nanosleep. --- llvm/unittests/Support/TimerTest.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/unittests/Support/TimerTest.cpp b/llvm/unittests/Support/TimerTest.cpp index 09545eb6939ae..5686b394e16cd 100644 --- a/llvm/unittests/Support/TimerTest.cpp +++ b/llvm/unittests/Support/TimerTest.cpp @@ -27,8 +27,13 @@ void SleepMS() { struct timespec Interval; Interval.tv_sec = 0; Interval.tv_nsec = 1000000; +#if defined(__MVS__) + long Microseconds = (Interval.tv_nsec + 999) / 1000; + usleep(Microseconds); +#else nanosleep(&Interval, nullptr); #endif +#endif } TEST(Timer, Additivity) { From 5ef02a3fd4758ae1b9151ac581eebd1109b4daad Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 25 Sep 2024 14:21:07 +0200 Subject: [PATCH 157/212] [InstCombine] Fall through to computeKnownBits() for sdiv by -1 When dividing by -1 we were breaking out of the code entirely, while we should fall through to computeKnownBits(). This fixes an instcombine-verify-known-bits discrepancy. Fixes https://github.com/llvm/llvm-project/issues/109957. --- .../InstCombine/InstCombineSimplifyDemanded.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 9c4d206692fac..c66db9285c799 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -858,11 +858,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, } case Instruction::SRem: { const APInt *Rem; - if (match(I->getOperand(1), m_APInt(Rem))) { - // X % -1 demands all the bits because we don't want to introduce - // INT_MIN % -1 (== undef) by accident. - if (Rem->isAllOnes()) - break; + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (match(I->getOperand(1), m_APInt(Rem)) && !Rem->isAllOnes()) { APInt RA = Rem->abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits From 60ed2361c0917b4f8d54cb85935cfbf8904aa51d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 08:58:29 -0400 Subject: [PATCH 158/212] [LV][EVL]Explicitly model AVL as sub, original TC, EVL_PHI. Patch explicitly models AVL as sub original TC, EVL_PHI instead of having it in EXPLICIT-VECTOR-LENGTH VPInstruction. Required for correct safe dependence distance suport. Reviewers: fhahn, ayalz Reviewed By: ayalz Pull Request: https://github.com/llvm/llvm-project/pull/108869 --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 30 +++++++------------ .../Transforms/Vectorize/VPlanTransforms.cpp | 15 +++++++--- .../RISCV/vplan-vp-intrinsics-reduction.ll | 3 +- .../RISCV/vplan-vp-intrinsics.ll | 3 +- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f33293e65010f..3f5b73d2d43c3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -477,27 +477,19 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateSelect(Cmp, Sub, Zero); } case VPInstruction::ExplicitVectorLength: { - // Compute EVL - auto GetEVL = [=](VPTransformState &State, Value *AVL) { - assert(AVL->getType()->isIntegerTy() && - "Requested vector length should be an integer."); - - // TODO: Add support for MaxSafeDist for correct loop emission. - assert(State.VF.isScalable() && "Expected scalable vector factor."); - Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); - - Value *EVL = State.Builder.CreateIntrinsic( - State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, - {AVL, VFArg, State.Builder.getTrue()}); - return EVL; - }; // TODO: Restructure this code with an explicit remainder loop, vsetvli can // be outside of the main loop. - // Compute VTC - IV as the AVL (requested vector length). - Value *Index = State.get(getOperand(0), VPIteration(0, 0)); - Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); - Value *AVL = State.Builder.CreateSub(TripCount, Index); - Value *EVL = GetEVL(State, AVL); + Value *AVL = State.get(getOperand(0), VPIteration(0, 0)); + // Compute EVL + assert(AVL->getType()->isIntegerTy() && + "Requested vector length should be an integer."); + + assert(State.VF.isScalable() && "Expected scalable vector factor."); + Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); + + Value *EVL = State.Builder.CreateIntrinsic( + State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, + {AVL, VFArg, State.Builder.getTrue()}); return EVL; } case VPInstruction::CanonicalIVIncrementForPart: { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3b37a1ec9560e..6872cc535a10b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1423,7 +1423,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { /// ... /// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ], /// [ %NextEVLIV, %vector.body ] -/// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC +/// %AVL = sub original TC, %EVLPhi +/// %VPEVL = EXPLICIT-VECTOR-LENGTH %AVL /// ... /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi /// ... @@ -1453,9 +1454,15 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) { // Create the ExplicitVectorLengthPhi recipe in the main loop. auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); EVLPhi->insertAfter(CanonicalIVPHI); - auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, - {EVLPhi, Plan.getTripCount()}); - VPEVL->insertBefore(*Header, Header->getFirstNonPhi()); + // TODO: Add support for MaxSafeDist for correct loop emission. + // Compute original TC - IV as the AVL (application vector length). + auto *AVL = new VPInstruction( + Instruction::Sub, {Plan.getTripCount(), EVLPhi}, + DebugLoc(), "avl"); + AVL->insertBefore(*Header, Header->getFirstNonPhi()); + auto *VPEVL = + new VPInstruction(VPInstruction::ExplicitVectorLength, AVL, DebugLoc()); + VPEVL->insertAfter(AVL); auto *CanonicalIVIncrement = cast(CanonicalIVPHI->getBackedgeValue()); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 11405a1c91158..90c209cf3f518 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -39,7 +39,8 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> ; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi ir<%start>, ir<[[RDX_NEXT:%.+]]> -; IF-EVL-INLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[EVL_PHI]]>, ir<%n> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%n>, vp<[[EVL_PHI]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> ; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index 6dfe5b608199b..c14a8bce8f48d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -23,7 +23,8 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: vector.body: ; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> -; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[EVL_PHI]]>, ir<%N> +; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> From ab0e8d0678f1093b9a8964cc798780b9f48aa35c Mon Sep 17 00:00:00 2001 From: sstipano <146831748+sstipano@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:02:23 +0200 Subject: [PATCH 159/212] [AMDGPU] Fix failing test after #109958 (#109964) --- .../GlobalISel/inst-select-unmerge-values.mir | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index 837f65d4bdec6..bec5f646b7839 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -171,9 +171,11 @@ body: | ; GCN-LABEL: name: test_unmerge_values_s_s64_s_s64_s64_s_s192 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr(s192) = G_IMPLICIT_DEF - ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr(s64), [[UV1:%[0-9]+]]:sgpr(s64), [[UV2:%[0-9]+]]:sgpr(s64) = G_UNMERGE_VALUES [[DEF]](s192) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](s64), implicit [[UV1]](s64), implicit [[UV2]](s64) + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub0_sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub2_sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[DEF]].sub4_sub5 + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] %0:sgpr(s192) = G_IMPLICIT_DEF %1:sgpr(s64), %2:sgpr(s64), %3:sgpr(s64) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 @@ -292,11 +294,11 @@ body: | ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>) ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>), [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]](<3 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[COPY5]](<3 x s32>) - ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV]](<3 x s32>) - ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV1]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>) + ; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>) + ; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>) %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6 %2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10 From 8e9011b3b8dc6a4234e5452951ae429f52127db6 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 06:04:49 -0700 Subject: [PATCH 160/212] [LV][NFC]Fix formatting --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6872cc535a10b..a878613c4ba48 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1456,9 +1456,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) { EVLPhi->insertAfter(CanonicalIVPHI); // TODO: Add support for MaxSafeDist for correct loop emission. // Compute original TC - IV as the AVL (application vector length). - auto *AVL = new VPInstruction( - Instruction::Sub, {Plan.getTripCount(), EVLPhi}, - DebugLoc(), "avl"); + auto *AVL = new VPInstruction(Instruction::Sub, {Plan.getTripCount(), EVLPhi}, + DebugLoc(), "avl"); AVL->insertBefore(*Header, Header->getFirstNonPhi()); auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, AVL, DebugLoc()); From fd88121a58da87bf0c5f3e4d8434948c28722640 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 06:09:30 -0700 Subject: [PATCH 161/212] [rtsan] Link in proper CXX ABI library (#109715) To match other sanitizers --- compiler-rt/lib/rtsan/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt index 0fc3a3f8f4896..d4296f56acd30 100644 --- a/compiler-rt/lib/rtsan/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/CMakeLists.txt @@ -27,7 +27,8 @@ set(RTSAN_CFLAGS set(RTSAN_LINK_FLAGS ${COMPILER_RT_COMMON_LINK_FLAGS}) set(RTSAN_LINK_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS} - ${COMPILER_RT_CXX_LINK_LIBS}) + ${SANITIZER_CXX_ABI_LIBRARIES} + ${SANITIZER_COMMON_LINK_LIBS}) append_rtti_flag(OFF RTSAN_CFLAGS) From 4be1c19a9fbdff02044cd46b703c842bb7a6afdb Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 25 Sep 2024 14:13:49 +0100 Subject: [PATCH 162/212] [VPlan] Adjust AnyOf after creating ComputeReductionResult (NFC). Prepares for a follow-up change to use VPInstruction::ResumePhi to create the resume phi for reductions. --- .../Transforms/Vectorize/LoopVectorize.cpp | 70 +++++++++---------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5e4f33c55610f..6298c54c99459 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9294,41 +9294,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - // Adjust AnyOf reductions; replace the reduction phi for the selected value - // with a boolean reduction phi node to check if the condition is true in - // any iteration. The final value is selected by the final - // ComputeReductionResult. - if (RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Select = cast(*find_if(PhiR->users(), [](VPUser *U) { - return isa(U) || - (isa(U) && - cast(U)->getUnderlyingInstr()->getOpcode() == - Instruction::Select); - })); - VPValue *Cmp = Select->getOperand(0); - // If the compare is checking the reduction PHI node, adjust it to check - // the start value. - if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) { - for (unsigned I = 0; I != CmpR->getNumOperands(); ++I) - if (CmpR->getOperand(I) == PhiR) - CmpR->setOperand(I, PhiR->getStartValue()); - } - VPBuilder::InsertPointGuard Guard(Builder); - Builder.setInsertPoint(Select); - - // If the true value of the select is the reduction phi, the new value is - // selected if the negated condition is true in any iteration. - if (Select->getOperand(1) == PhiR) - Cmp = Builder.createNot(Cmp); - VPValue *Or = Builder.createOr(PhiR, Cmp); - Select->getVPSingleValue()->replaceAllUsesWith(Or); - - // Convert the reduction phi to operate on bools. - PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( - OrigLoop->getHeader()->getContext()))); - } - // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. @@ -9401,6 +9366,41 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( return match(&User, m_Binary(m_VPValue(), m_VPValue())); }); + + // Adjust AnyOf reductions; replace the reduction phi for the selected value + // with a boolean reduction phi node to check if the condition is true in + // any iteration. The final value is selected by the final + // ComputeReductionResult. + if (RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + auto *Select = cast(*find_if(PhiR->users(), [](VPUser *U) { + return isa(U) || + (isa(U) && + cast(U)->getUnderlyingInstr()->getOpcode() == + Instruction::Select); + })); + VPValue *Cmp = Select->getOperand(0); + // If the compare is checking the reduction PHI node, adjust it to check + // the start value. + if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) { + for (unsigned I = 0; I != CmpR->getNumOperands(); ++I) + if (CmpR->getOperand(I) == PhiR) + CmpR->setOperand(I, PhiR->getStartValue()); + } + VPBuilder::InsertPointGuard Guard(Builder); + Builder.setInsertPoint(Select); + + // If the true value of the select is the reduction phi, the new value is + // selected if the negated condition is true in any iteration. + if (Select->getOperand(1) == PhiR) + Cmp = Builder.createNot(Cmp); + VPValue *Or = Builder.createOr(PhiR, Cmp); + Select->getVPSingleValue()->replaceAllUsesWith(Or); + + // Convert the reduction phi to operate on bools. + PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( + OrigLoop->getHeader()->getContext()))); + } } VPlanTransforms::clearReductionWrapFlags(*Plan); From 3f37c517fbc40531571f8b9f951a8610b4789cd6 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 25 Sep 2024 14:22:23 +0100 Subject: [PATCH 163/212] [NFC] Switch a number of DenseMaps to SmallDenseMaps for speedup (#109417) If we use SmallDenseMaps instead of DenseMaps at these locations, we get a substantial speedup because there's less spurious malloc traffic. Discovered by instrumenting DenseMap with some accounting code, then selecting sites where we'll get the most bang for our buck. --- .../llvm/Analysis/MemoryDependenceAnalysis.h | 2 +- .../include/llvm/Analysis/SparsePropagation.h | 11 ++--- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +- llvm/lib/Analysis/ScalarEvolution.cpp | 4 +- llvm/lib/CodeGen/CalcSpillWeights.cpp | 2 +- llvm/lib/CodeGen/MachineLICM.cpp | 14 +++--- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 48 ++++++++----------- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 46 +++++++++--------- .../CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 12 ++--- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 3 +- .../Transforms/IPO/CalledValuePropagation.cpp | 35 ++++++++------ llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 6 ++- .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +++--- 14 files changed, 103 insertions(+), 101 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index decb33e6af6bc..c31e663498d5f 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -492,7 +492,7 @@ class MemoryDependenceResults { const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, SmallVectorImpl &Result, - DenseMap &Visited, + SmallDenseMap &Visited, bool SkipFirstBlock = false, bool IsIncomplete = false); MemDepResult getNonLocalInfoForBlock(Instruction *QueryInst, diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index d5805a7314757..cc79870229873 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -87,10 +87,9 @@ template class AbstractLatticeFunction { /// ComputeInstructionState - Compute the LatticeKeys that change as a result /// of executing instruction \p I. Their associated LatticeVals are store in /// \p ChangedValues. - virtual void - ComputeInstructionState(Instruction &I, - DenseMap &ChangedValues, - SparseSolver &SS) = 0; + virtual void ComputeInstructionState( + Instruction &I, SmallDenseMap &ChangedValues, + SparseSolver &SS) = 0; /// PrintLatticeVal - Render the given LatticeVal to the specified stream. virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); @@ -401,7 +400,7 @@ void SparseSolver::visitPHINode(PHINode &PN) { // computed from its incoming values. For example, SSI form stores its sigma // functions as PHINodes with a single incoming value. if (LatticeFunc->IsSpecialCasedPHI(&PN)) { - DenseMap ChangedValues; + SmallDenseMap ChangedValues; LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) @@ -456,7 +455,7 @@ void SparseSolver::visitInst(Instruction &I) { // Otherwise, ask the transfer function what the result is. If this is // something that we care about, remember it. - DenseMap ChangedValues; + SmallDenseMap ChangedValues; LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 79504ca7b73c8..c5fba184cd085 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -888,7 +888,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency( // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. - DenseMap Visited; + SmallDenseMap Visited; if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; @@ -1038,7 +1038,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( Instruction *QueryInst, const PHITransAddr &Pointer, const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl &Result, - DenseMap &Visited, bool SkipFirstBlock, + SmallDenseMap &Visited, bool SkipFirstBlock, bool IsIncomplete) { // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 233f8edca5b13..09e5c080c19cf 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2255,7 +2255,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// the common case where no interesting opportunities are present, and /// is also used as a check to avoid infinite recursion. static bool -CollectAddOperandsWithScales(DenseMap &M, +CollectAddOperandsWithScales(SmallDenseMap &M, SmallVectorImpl &NewOps, APInt &AccumulatedConstant, ArrayRef Ops, const APInt &Scale, @@ -2753,7 +2753,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // operands multiplied by constant values. if (Idx < Ops.size() && isa(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); - DenseMap M; + SmallDenseMap M; SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 9d8c9119f7719..88ed2291313c9 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -222,7 +222,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, bool IsExiting = false; std::set CopyHints; - DenseMap Hint; + SmallDenseMap Hint; for (MachineRegisterInfo::reg_instr_nodbg_iterator I = MRI.reg_instr_nodbg_begin(LI.reg()), E = MRI.reg_instr_nodbg_end(); diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 6768eeeb4364c..3289a692221ba 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -239,7 +239,7 @@ namespace { bool IsCheapInstruction(MachineInstr &MI) const; - bool CanCauseHighRegPressure(const DenseMap &Cost, + bool CanCauseHighRegPressure(const SmallDenseMap &Cost, bool Cheap); void UpdateBackTraceRegPressure(const MachineInstr *MI); @@ -264,9 +264,9 @@ namespace { void InitRegPressure(MachineBasicBlock *BB); - DenseMap calcRegisterCost(const MachineInstr *MI, - bool ConsiderSeen, - bool ConsiderUnseenAsDef); + SmallDenseMap calcRegisterCost(const MachineInstr *MI, + bool ConsiderSeen, + bool ConsiderUnseenAsDef); void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); @@ -977,10 +977,10 @@ void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI, /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to /// figure out which usages are live-ins. /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. -DenseMap +SmallDenseMap MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef) { - DenseMap Cost; + SmallDenseMap Cost; if (MI->isImplicitDef()) return Cost; for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { @@ -1248,7 +1248,7 @@ bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const { /// Visit BBs from header to current BB, check if hoisting an instruction of the /// given cost matrix can cause high register pressure. bool MachineLICMImpl::CanCauseHighRegPressure( - const DenseMap &Cost, bool CheapInstr) { + const SmallDenseMap &Cost, bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { if (RPIdAndCost.second <= 0) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 53ce21906204c..12a48ab06f1c0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -82,8 +82,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, - DenseMap &VRBaseMap) { + Register SrcReg, VRBaseMapType &VRBaseMap) { Register VRBase; if (SrcReg.isVirtual()) { // Just use the input register directly! @@ -187,7 +186,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); @@ -265,8 +264,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. -Register InstrEmitter::getVR(SDValue Op, - DenseMap &VRBaseMap) { +Register InstrEmitter::getVR(SDValue Op, VRBaseMapType &VRBaseMap) { if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. @@ -280,7 +278,7 @@ Register InstrEmitter::getVR(SDValue Op, return VReg; } - DenseMap::iterator I = VRBaseMap.find(Op); + VRBaseMapType::iterator I = VRBaseMap.find(Op); assert(I != VRBaseMap.end() && "Node emitted out of order - late"); return I->second; } @@ -318,7 +316,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && @@ -395,12 +393,10 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. -void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, - SDValue Op, - unsigned IIOpNum, - const MCInstrDesc *II, - DenseMap &VRBaseMap, - bool IsDebug, bool IsClone, bool IsCloned) { +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, + unsigned IIOpNum, const MCInstrDesc *II, + VRBaseMapType &VRBaseMap, bool IsDebug, + bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); @@ -499,8 +495,7 @@ Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx, /// EmitSubregNode - Generate machine code for subreg nodes. /// -void InstrEmitter::EmitSubregNode(SDNode *Node, - DenseMap &VRBaseMap, +void InstrEmitter::EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, bool IsCloned) { Register VRBase; unsigned Opc = Node->getMachineOpcode(); @@ -634,7 +629,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, /// void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { Register VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. @@ -653,9 +648,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// -void InstrEmitter::EmitRegSequence(SDNode *Node, - DenseMap &VRBaseMap, - bool IsClone, bool IsCloned) { +void InstrEmitter::EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, + bool IsClone, bool IsCloned) { unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); @@ -703,7 +697,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { DebugLoc DL = SD->getDebugLoc(); assert(cast(SD->getVariable()) ->isValidLocationForIntrinsic(DL) && @@ -755,7 +749,7 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) { void InstrEmitter::AddDbgValueLocationOps( MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef LocationOps, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { for (const SDDbgOperand &Op : LocationOps) { switch (Op.getKind()) { case SDDbgOperand::FRAMEIX: @@ -786,7 +780,7 @@ void InstrEmitter::AddDbgValueLocationOps( MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); const DIExpression *Expr = (DIExpression *)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -862,7 +856,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, // Look up the corresponding VReg for the given SDNode, if any. SDNode *Node = DbgOperand.getSDNode(); SDValue Op = SDValue(Node, DbgOperand.getResNo()); - DenseMap::iterator I = VRBaseMap.find(Op); + VRBaseMapType::iterator I = VRBaseMap.find(Op); // No VReg -> produce a DBG_VALUE $noreg instead. if (I == VRBaseMap.end()) break; @@ -928,7 +922,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { MachineInstr * InstrEmitter::EmitDbgValueList(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -944,7 +938,7 @@ InstrEmitter::EmitDbgValueList(SDDbgValue *SD, MachineInstr * InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -996,7 +990,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -1238,7 +1232,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 959bce31c8b27..16d754cdc2338 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -30,6 +30,10 @@ class TargetLowering; class TargetMachine; class LLVM_LIBRARY_VISIBILITY InstrEmitter { +public: + using VRBaseMapType = SmallDenseMap; + +private: MachineFunction *MF; MachineRegisterInfo *MRI; const TargetInstrInfo *TII; @@ -45,18 +49,17 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, DenseMap &VRBaseMap); + Register SrcReg, VRBaseMapType &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. - Register getVR(SDValue Op, - DenseMap &VRBaseMap); + Register getVR(SDValue Op, VRBaseMapType &VRBaseMap); /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -65,7 +68,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II @@ -76,7 +79,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap &VRBaseMap, + VRBaseMapType &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// ConstrainForSubReg - Try to constrain VReg to a register class that @@ -87,20 +90,20 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap, - bool IsClone, bool IsCloned); + void EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, + bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination /// register is constrained to be in a particular register class. /// - void EmitCopyToRegClassNode(SDNode *Node, - DenseMap &VRBaseMap); + void EmitCopyToRegClassNode(SDNode *Node, VRBaseMapType &VRBaseMap); /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, DenseMap &VRBaseMap, - bool IsClone, bool IsCloned); + void EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, + bool IsCloned); + public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands @@ -110,29 +113,26 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { void AddDbgValueLocationOps(MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef Locations, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// - MachineInstr *EmitDbgValue(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgValue(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a dbg_value as a DBG_INSTR_REF. May produce DBG_VALUE $noreg instead /// if there is no variable location; alternately a half-formed DBG_INSTR_REF /// that refers to a virtual register and is corrected later in isel. - MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a DBG_VALUE $noreg, indicating a variable has no location. MachineInstr *EmitDbgNoLocation(SDDbgValue *SD); /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue. - MachineInstr *EmitDbgValueList(SDDbgValue *SD, - DenseMap &VRBaseMap); + MachineInstr *EmitDbgValueList(SDDbgValue *SD, VRBaseMapType &VRBaseMap); /// Emit a DBG_VALUE from the operands to SDDbgValue. MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -140,7 +140,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { + VRBaseMapType &VRBaseMap) { if (Node->isMachineOpcode()) EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else @@ -160,9 +160,9 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap); + VRBaseMapType &VRBaseMap); }; } // namespace llvm diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index de4a1ac2a3baf..70a7438440191 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -770,7 +770,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - DenseMap VRBaseMap; + InstrEmitter::VRBaseMapType VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 53dd71d173473..31939ae5922ec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -737,7 +737,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl > &Orders, - DenseMap &VRBaseMap, unsigned Order) { + InstrEmitter::VRBaseMapType &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -782,7 +782,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - DenseMap &VRBaseMap, + InstrEmitter::VRBaseMapType &VRBaseMap, SmallVectorImpl> &Orders, SmallSet &Seen, MachineInstr *NewInsn) { unsigned Order = N->getIROrder(); @@ -808,7 +808,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } void ScheduleDAGSDNodes:: -EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, +EmitPhysRegCopy(SUnit *SU, SmallDenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos) { for (const SDep &Pred : SU->Preds) { if (Pred.isCtrl()) @@ -851,8 +851,8 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - DenseMap VRBaseMap; - DenseMap CopyVRBaseMap; + InstrEmitter::VRBaseMapType VRBaseMap; + SmallDenseMap CopyVRBaseMap; SmallVector, 32> Orders; SmallSet Seen; bool HasDbg = DAG->hasDebugValues(); @@ -861,7 +861,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Zero, one, or multiple instructions can be created when emitting a node. auto EmitNode = [&](SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) -> MachineInstr * { + InstrEmitter::VRBaseMapType &VRBaseMap) -> MachineInstr * { // Fetch instruction prior to this, or end() if nonexistant. auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { if (I == BB->begin()) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 446df640821d8..b7d25c6ccc9b0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -184,7 +184,8 @@ class InstrItineraryData; void BuildSchedUnits(); void AddSchedEdges(); - void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, + void EmitPhysRegCopy(SUnit *SU, + SmallDenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index acc10f57c29ac..66ae0706d638c 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -169,7 +169,8 @@ class CVPLatticeFunc /// just a few kinds of instructions since we're only propagating values that /// can be called. void ComputeInstructionState( - Instruction &I, DenseMap &ChangedValues, + Instruction &I, + SmallDenseMap &ChangedValues, SparseSolver &SS) override { switch (I.getOpcode()) { case Instruction::Call: @@ -238,9 +239,10 @@ class CVPLatticeFunc /// Handle return instructions. The function's return state is the merge of /// the returned value state and the function's return state. - void visitReturn(ReturnInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitReturn(ReturnInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { Function *F = I.getParent()->getParent(); if (F->getReturnType()->isVoidTy()) return; @@ -254,9 +256,10 @@ class CVPLatticeFunc /// the merge of the argument state with the call sites corresponding actual /// argument state. The call site state is the merge of the call site state /// with the returned value state of the called function. - void visitCallBase(CallBase &CB, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitCallBase(CallBase &CB, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { Function *F = CB.getCalledFunction(); auto RegI = CVPLatticeKey(&CB, IPOGrouping::Register); @@ -298,9 +301,10 @@ class CVPLatticeFunc /// Handle select instructions. The select instruction state is the merge the /// true and false value states. - void visitSelect(SelectInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitSelect(SelectInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register); auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register); @@ -312,7 +316,7 @@ class CVPLatticeFunc /// variable, we attempt to track the value. The loaded value state is the /// merge of the loaded value state with the global variable state. void visitLoad(LoadInst &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); if (auto *GV = dyn_cast(I.getPointerOperand())) { @@ -327,9 +331,10 @@ class CVPLatticeFunc /// Handle store instructions. If the pointer operand of the store is a /// global variable, we attempt to track the value. The global variable state /// is the merge of the stored value state with the global variable state. - void visitStore(StoreInst &I, - DenseMap &ChangedValues, - SparseSolver &SS) { + void + visitStore(StoreInst &I, + SmallDenseMap &ChangedValues, + SparseSolver &SS) { auto *GV = dyn_cast(I.getPointerOperand()); if (!GV) return; @@ -342,7 +347,7 @@ class CVPLatticeFunc /// Handle all other instructions. All other instructions are marked /// overdefined. void visitInst(Instruction &I, - DenseMap &ChangedValues, + SmallDenseMap &ChangedValues, SparseSolver &SS) { // Simply bail if this instruction has no user. if (I.use_empty()) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 4144c7993b7e4..7bffd4da75a5b 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -503,7 +503,8 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { static bool DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { SmallVector ToBeRemoved; - DenseMap, DIExpression *>> + SmallDenseMap, DIExpression *>, 4> VariableMap; for (auto &I : *BB) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { @@ -584,7 +585,8 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { return DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BB); SmallVector ToBeRemoved; - DenseMap, DIExpression *>> + SmallDenseMap, DIExpression *>, 4> VariableMap; for (auto &I : *BB) { if (DbgValueInst *DVI = dyn_cast(&I)) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 414c6388c777b..cc02d8237db63 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7512,7 +7512,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto TryToFindDuplicates = [&](const InstructionsState &S, bool DoNotFail = false) { // Check that every instruction appears once in this bundle. - DenseMap UniquePositions(VL.size()); + SmallDenseMap UniquePositions(VL.size()); for (Value *V : VL) { if (isConstant(V)) { ReuseShuffleIndices.emplace_back( @@ -18383,7 +18383,8 @@ class HorizontalReduction { for (Value *V : Candidates) TrackedVals.try_emplace(V, V); - auto At = [](MapVector &MV, Value *V) -> unsigned & { + auto At = [](SmallMapVector &MV, + Value *V) -> unsigned & { auto *It = MV.find(V); assert(It != MV.end() && "Unable to find given key."); return It->second; @@ -18470,7 +18471,7 @@ class HorizontalReduction { RdxKind != RecurKind::FMul && RdxKind != RecurKind::FMulAdd; // Gather same values. - MapVector SameValuesCounter; + SmallMapVector SameValuesCounter; if (IsSupportedHorRdxIdentityOp) for (Value *V : Candidates) { Value *OrigV = TrackedToOrig.at(V); @@ -19089,10 +19090,10 @@ class HorizontalReduction { /// Emits actual operation for the scalar identity values, found during /// horizontal reduction analysis. - Value *emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, - BoUpSLP &R, - const MapVector &SameValuesCounter, - const DenseMap &TrackedToOrig) { + Value * + emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, BoUpSLP &R, + const SmallMapVector &SameValuesCounter, + const DenseMap &TrackedToOrig) { assert(IsSupportedHorRdxIdentityOp && "The optimization of matched scalar identity horizontal reductions " "must be supported."); From 817e742ba55406688bf1f00557d24a60cfce962f Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 25 Sep 2024 14:31:30 +0100 Subject: [PATCH 164/212] Revert "[NFC] Switch a number of DenseMaps to SmallDenseMaps for speedup (#109417)" This reverts commit 3f37c517fbc40531571f8b9f951a8610b4789cd6. Lo and behold, I missed a unit test --- .../llvm/Analysis/MemoryDependenceAnalysis.h | 2 +- .../include/llvm/Analysis/SparsePropagation.h | 11 +++-- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +- llvm/lib/Analysis/ScalarEvolution.cpp | 4 +- llvm/lib/CodeGen/CalcSpillWeights.cpp | 2 +- llvm/lib/CodeGen/MachineLICM.cpp | 14 +++--- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 48 +++++++++++-------- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 46 +++++++++--------- .../CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 12 ++--- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 3 +- .../Transforms/IPO/CalledValuePropagation.cpp | 35 ++++++-------- llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 6 +-- .../Transforms/Vectorize/SLPVectorizer.cpp | 15 +++--- 14 files changed, 101 insertions(+), 103 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index c31e663498d5f..decb33e6af6bc 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -492,7 +492,7 @@ class MemoryDependenceResults { const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, SmallVectorImpl &Result, - SmallDenseMap &Visited, + DenseMap &Visited, bool SkipFirstBlock = false, bool IsIncomplete = false); MemDepResult getNonLocalInfoForBlock(Instruction *QueryInst, diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index cc79870229873..d5805a7314757 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -87,9 +87,10 @@ template class AbstractLatticeFunction { /// ComputeInstructionState - Compute the LatticeKeys that change as a result /// of executing instruction \p I. Their associated LatticeVals are store in /// \p ChangedValues. - virtual void ComputeInstructionState( - Instruction &I, SmallDenseMap &ChangedValues, - SparseSolver &SS) = 0; + virtual void + ComputeInstructionState(Instruction &I, + DenseMap &ChangedValues, + SparseSolver &SS) = 0; /// PrintLatticeVal - Render the given LatticeVal to the specified stream. virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); @@ -400,7 +401,7 @@ void SparseSolver::visitPHINode(PHINode &PN) { // computed from its incoming values. For example, SSI form stores its sigma // functions as PHINodes with a single incoming value. if (LatticeFunc->IsSpecialCasedPHI(&PN)) { - SmallDenseMap ChangedValues; + DenseMap ChangedValues; LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) @@ -455,7 +456,7 @@ void SparseSolver::visitInst(Instruction &I) { // Otherwise, ask the transfer function what the result is. If this is // something that we care about, remember it. - SmallDenseMap ChangedValues; + DenseMap ChangedValues; LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); for (auto &ChangedValue : ChangedValues) if (ChangedValue.second != LatticeFunc->getUntrackedVal()) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index c5fba184cd085..79504ca7b73c8 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -888,7 +888,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency( // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. - SmallDenseMap Visited; + DenseMap Visited; if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, Result, Visited, true)) return; @@ -1038,7 +1038,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( Instruction *QueryInst, const PHITransAddr &Pointer, const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl &Result, - SmallDenseMap &Visited, bool SkipFirstBlock, + DenseMap &Visited, bool SkipFirstBlock, bool IsIncomplete) { // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 09e5c080c19cf..233f8edca5b13 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2255,7 +2255,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// the common case where no interesting opportunities are present, and /// is also used as a check to avoid infinite recursion. static bool -CollectAddOperandsWithScales(SmallDenseMap &M, +CollectAddOperandsWithScales(DenseMap &M, SmallVectorImpl &NewOps, APInt &AccumulatedConstant, ArrayRef Ops, const APInt &Scale, @@ -2753,7 +2753,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // operands multiplied by constant values. if (Idx < Ops.size() && isa(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); - SmallDenseMap M; + DenseMap M; SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 88ed2291313c9..9d8c9119f7719 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -222,7 +222,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, bool IsExiting = false; std::set CopyHints; - SmallDenseMap Hint; + DenseMap Hint; for (MachineRegisterInfo::reg_instr_nodbg_iterator I = MRI.reg_instr_nodbg_begin(LI.reg()), E = MRI.reg_instr_nodbg_end(); diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 3289a692221ba..6768eeeb4364c 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -239,7 +239,7 @@ namespace { bool IsCheapInstruction(MachineInstr &MI) const; - bool CanCauseHighRegPressure(const SmallDenseMap &Cost, + bool CanCauseHighRegPressure(const DenseMap &Cost, bool Cheap); void UpdateBackTraceRegPressure(const MachineInstr *MI); @@ -264,9 +264,9 @@ namespace { void InitRegPressure(MachineBasicBlock *BB); - SmallDenseMap calcRegisterCost(const MachineInstr *MI, - bool ConsiderSeen, - bool ConsiderUnseenAsDef); + DenseMap calcRegisterCost(const MachineInstr *MI, + bool ConsiderSeen, + bool ConsiderUnseenAsDef); void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); @@ -977,10 +977,10 @@ void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI, /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to /// figure out which usages are live-ins. /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. -SmallDenseMap +DenseMap MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef) { - SmallDenseMap Cost; + DenseMap Cost; if (MI->isImplicitDef()) return Cost; for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { @@ -1248,7 +1248,7 @@ bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const { /// Visit BBs from header to current BB, check if hoisting an instruction of the /// given cost matrix can cause high register pressure. bool MachineLICMImpl::CanCauseHighRegPressure( - const SmallDenseMap &Cost, bool CheapInstr) { + const DenseMap &Cost, bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { if (RPIdAndCost.second <= 0) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 12a48ab06f1c0..53ce21906204c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -82,7 +82,8 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, VRBaseMapType &VRBaseMap) { + Register SrcReg, + DenseMap &VRBaseMap) { Register VRBase; if (SrcReg.isVirtual()) { // Just use the input register directly! @@ -186,7 +187,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); @@ -264,7 +265,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. -Register InstrEmitter::getVR(SDValue Op, VRBaseMapType &VRBaseMap) { +Register InstrEmitter::getVR(SDValue Op, + DenseMap &VRBaseMap) { if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. @@ -278,7 +280,7 @@ Register InstrEmitter::getVR(SDValue Op, VRBaseMapType &VRBaseMap) { return VReg; } - VRBaseMapType::iterator I = VRBaseMap.find(Op); + DenseMap::iterator I = VRBaseMap.find(Op); assert(I != VRBaseMap.end() && "Node emitted out of order - late"); return I->second; } @@ -316,7 +318,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - VRBaseMapType &VRBaseMap, + DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && @@ -393,10 +395,12 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. -void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, - unsigned IIOpNum, const MCInstrDesc *II, - VRBaseMapType &VRBaseMap, bool IsDebug, - bool IsClone, bool IsCloned) { +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); @@ -495,7 +499,8 @@ Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx, /// EmitSubregNode - Generate machine code for subreg nodes. /// -void InstrEmitter::EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, +void InstrEmitter::EmitSubregNode(SDNode *Node, + DenseMap &VRBaseMap, bool IsClone, bool IsCloned) { Register VRBase; unsigned Opc = Node->getMachineOpcode(); @@ -629,7 +634,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, /// void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { Register VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. @@ -648,8 +653,9 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// -void InstrEmitter::EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, - bool IsClone, bool IsCloned) { +void InstrEmitter::EmitRegSequence(SDNode *Node, + DenseMap &VRBaseMap, + bool IsClone, bool IsCloned) { unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); @@ -697,7 +703,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { DebugLoc DL = SD->getDebugLoc(); assert(cast(SD->getVariable()) ->isValidLocationForIntrinsic(DL) && @@ -749,7 +755,7 @@ MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) { void InstrEmitter::AddDbgValueLocationOps( MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef LocationOps, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { for (const SDDbgOperand &Op : LocationOps) { switch (Op.getKind()) { case SDDbgOperand::FRAMEIX: @@ -780,7 +786,7 @@ void InstrEmitter::AddDbgValueLocationOps( MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); const DIExpression *Expr = (DIExpression *)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -856,7 +862,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, // Look up the corresponding VReg for the given SDNode, if any. SDNode *Node = DbgOperand.getSDNode(); SDValue Op = SDValue(Node, DbgOperand.getResNo()); - VRBaseMapType::iterator I = VRBaseMap.find(Op); + DenseMap::iterator I = VRBaseMap.find(Op); // No VReg -> produce a DBG_VALUE $noreg instead. if (I == VRBaseMap.end()) break; @@ -922,7 +928,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { MachineInstr * InstrEmitter::EmitDbgValueList(SDDbgValue *SD, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -938,7 +944,7 @@ InstrEmitter::EmitDbgValueList(SDDbgValue *SD, MachineInstr * InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -990,7 +996,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -1232,7 +1238,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 16d754cdc2338..959bce31c8b27 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -30,10 +30,6 @@ class TargetLowering; class TargetMachine; class LLVM_LIBRARY_VISIBILITY InstrEmitter { -public: - using VRBaseMapType = SmallDenseMap; - -private: MachineFunction *MF; MachineRegisterInfo *MRI; const TargetInstrInfo *TII; @@ -49,17 +45,18 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - Register SrcReg, VRBaseMapType &VRBaseMap); + Register SrcReg, DenseMap &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. - Register getVR(SDValue Op, VRBaseMapType &VRBaseMap); + Register getVR(SDValue Op, + DenseMap &VRBaseMap); /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -68,7 +65,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - VRBaseMapType &VRBaseMap, + DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II @@ -79,7 +76,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - VRBaseMapType &VRBaseMap, + DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// ConstrainForSubReg - Try to constrain VReg to a register class that @@ -90,20 +87,20 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, - bool IsCloned); + void EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap, + bool IsClone, bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination /// register is constrained to be in a particular register class. /// - void EmitCopyToRegClassNode(SDNode *Node, VRBaseMapType &VRBaseMap); + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap &VRBaseMap); /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, VRBaseMapType &VRBaseMap, bool IsClone, - bool IsCloned); - + void EmitRegSequence(SDNode *Node, DenseMap &VRBaseMap, + bool IsClone, bool IsCloned); public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands @@ -113,26 +110,29 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { void AddDbgValueLocationOps(MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef Locations, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// - MachineInstr *EmitDbgValue(SDDbgValue *SD, VRBaseMapType &VRBaseMap); + MachineInstr *EmitDbgValue(SDDbgValue *SD, + DenseMap &VRBaseMap); /// Emit a dbg_value as a DBG_INSTR_REF. May produce DBG_VALUE $noreg instead /// if there is no variable location; alternately a half-formed DBG_INSTR_REF /// that refers to a virtual register and is corrected later in isel. - MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, VRBaseMapType &VRBaseMap); + MachineInstr *EmitDbgInstrRef(SDDbgValue *SD, + DenseMap &VRBaseMap); /// Emit a DBG_VALUE $noreg, indicating a variable has no location. MachineInstr *EmitDbgNoLocation(SDDbgValue *SD); /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue. - MachineInstr *EmitDbgValueList(SDDbgValue *SD, VRBaseMapType &VRBaseMap); + MachineInstr *EmitDbgValueList(SDDbgValue *SD, + DenseMap &VRBaseMap); /// Emit a DBG_VALUE from the operands to SDDbgValue. MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -140,7 +140,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap) { + DenseMap &VRBaseMap) { if (Node->isMachineOpcode()) EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else @@ -160,9 +160,9 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - VRBaseMapType &VRBaseMap); + DenseMap &VRBaseMap); }; } // namespace llvm diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 70a7438440191..de4a1ac2a3baf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -770,7 +770,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - InstrEmitter::VRBaseMapType VRBaseMap; + DenseMap VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 31939ae5922ec..53dd71d173473 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -737,7 +737,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl > &Orders, - InstrEmitter::VRBaseMapType &VRBaseMap, unsigned Order) { + DenseMap &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -782,7 +782,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - InstrEmitter::VRBaseMapType &VRBaseMap, + DenseMap &VRBaseMap, SmallVectorImpl> &Orders, SmallSet &Seen, MachineInstr *NewInsn) { unsigned Order = N->getIROrder(); @@ -808,7 +808,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } void ScheduleDAGSDNodes:: -EmitPhysRegCopy(SUnit *SU, SmallDenseMap &VRBaseMap, +EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos) { for (const SDep &Pred : SU->Preds) { if (Pred.isCtrl()) @@ -851,8 +851,8 @@ EmitPhysRegCopy(SUnit *SU, SmallDenseMap &VRBaseMap, MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); - InstrEmitter::VRBaseMapType VRBaseMap; - SmallDenseMap CopyVRBaseMap; + DenseMap VRBaseMap; + DenseMap CopyVRBaseMap; SmallVector, 32> Orders; SmallSet Seen; bool HasDbg = DAG->hasDebugValues(); @@ -861,7 +861,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Zero, one, or multiple instructions can be created when emitting a node. auto EmitNode = [&](SDNode *Node, bool IsClone, bool IsCloned, - InstrEmitter::VRBaseMapType &VRBaseMap) -> MachineInstr * { + DenseMap &VRBaseMap) -> MachineInstr * { // Fetch instruction prior to this, or end() if nonexistant. auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { if (I == BB->begin()) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index b7d25c6ccc9b0..446df640821d8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -184,8 +184,7 @@ class InstrItineraryData; void BuildSchedUnits(); void AddSchedEdges(); - void EmitPhysRegCopy(SUnit *SU, - SmallDenseMap &VRBaseMap, + void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp index 66ae0706d638c..acc10f57c29ac 100644 --- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp +++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -169,8 +169,7 @@ class CVPLatticeFunc /// just a few kinds of instructions since we're only propagating values that /// can be called. void ComputeInstructionState( - Instruction &I, - SmallDenseMap &ChangedValues, + Instruction &I, DenseMap &ChangedValues, SparseSolver &SS) override { switch (I.getOpcode()) { case Instruction::Call: @@ -239,10 +238,9 @@ class CVPLatticeFunc /// Handle return instructions. The function's return state is the merge of /// the returned value state and the function's return state. - void - visitReturn(ReturnInst &I, - SmallDenseMap &ChangedValues, - SparseSolver &SS) { + void visitReturn(ReturnInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { Function *F = I.getParent()->getParent(); if (F->getReturnType()->isVoidTy()) return; @@ -256,10 +254,9 @@ class CVPLatticeFunc /// the merge of the argument state with the call sites corresponding actual /// argument state. The call site state is the merge of the call site state /// with the returned value state of the called function. - void - visitCallBase(CallBase &CB, - SmallDenseMap &ChangedValues, - SparseSolver &SS) { + void visitCallBase(CallBase &CB, + DenseMap &ChangedValues, + SparseSolver &SS) { Function *F = CB.getCalledFunction(); auto RegI = CVPLatticeKey(&CB, IPOGrouping::Register); @@ -301,10 +298,9 @@ class CVPLatticeFunc /// Handle select instructions. The select instruction state is the merge the /// true and false value states. - void - visitSelect(SelectInst &I, - SmallDenseMap &ChangedValues, - SparseSolver &SS) { + void visitSelect(SelectInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register); auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register); @@ -316,7 +312,7 @@ class CVPLatticeFunc /// variable, we attempt to track the value. The loaded value state is the /// merge of the loaded value state with the global variable state. void visitLoad(LoadInst &I, - SmallDenseMap &ChangedValues, + DenseMap &ChangedValues, SparseSolver &SS) { auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); if (auto *GV = dyn_cast(I.getPointerOperand())) { @@ -331,10 +327,9 @@ class CVPLatticeFunc /// Handle store instructions. If the pointer operand of the store is a /// global variable, we attempt to track the value. The global variable state /// is the merge of the stored value state with the global variable state. - void - visitStore(StoreInst &I, - SmallDenseMap &ChangedValues, - SparseSolver &SS) { + void visitStore(StoreInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { auto *GV = dyn_cast(I.getPointerOperand()); if (!GV) return; @@ -347,7 +342,7 @@ class CVPLatticeFunc /// Handle all other instructions. All other instructions are marked /// overdefined. void visitInst(Instruction &I, - SmallDenseMap &ChangedValues, + DenseMap &ChangedValues, SparseSolver &SS) { // Simply bail if this instruction has no user. if (I.use_empty()) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 7bffd4da75a5b..4144c7993b7e4 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -503,8 +503,7 @@ static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) { static bool DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { SmallVector ToBeRemoved; - SmallDenseMap, DIExpression *>, 4> + DenseMap, DIExpression *>> VariableMap; for (auto &I : *BB) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { @@ -585,8 +584,7 @@ static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) { return DbgVariableRecordsRemoveRedundantDbgInstrsUsingForwardScan(BB); SmallVector ToBeRemoved; - SmallDenseMap, DIExpression *>, 4> + DenseMap, DIExpression *>> VariableMap; for (auto &I : *BB) { if (DbgValueInst *DVI = dyn_cast(&I)) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cc02d8237db63..414c6388c777b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7512,7 +7512,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, auto TryToFindDuplicates = [&](const InstructionsState &S, bool DoNotFail = false) { // Check that every instruction appears once in this bundle. - SmallDenseMap UniquePositions(VL.size()); + DenseMap UniquePositions(VL.size()); for (Value *V : VL) { if (isConstant(V)) { ReuseShuffleIndices.emplace_back( @@ -18383,8 +18383,7 @@ class HorizontalReduction { for (Value *V : Candidates) TrackedVals.try_emplace(V, V); - auto At = [](SmallMapVector &MV, - Value *V) -> unsigned & { + auto At = [](MapVector &MV, Value *V) -> unsigned & { auto *It = MV.find(V); assert(It != MV.end() && "Unable to find given key."); return It->second; @@ -18471,7 +18470,7 @@ class HorizontalReduction { RdxKind != RecurKind::FMul && RdxKind != RecurKind::FMulAdd; // Gather same values. - SmallMapVector SameValuesCounter; + MapVector SameValuesCounter; if (IsSupportedHorRdxIdentityOp) for (Value *V : Candidates) { Value *OrigV = TrackedToOrig.at(V); @@ -19090,10 +19089,10 @@ class HorizontalReduction { /// Emits actual operation for the scalar identity values, found during /// horizontal reduction analysis. - Value * - emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, BoUpSLP &R, - const SmallMapVector &SameValuesCounter, - const DenseMap &TrackedToOrig) { + Value *emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder, + BoUpSLP &R, + const MapVector &SameValuesCounter, + const DenseMap &TrackedToOrig) { assert(IsSupportedHorRdxIdentityOp && "The optimization of matched scalar identity horizontal reductions " "must be supported."); From 02c138f8d1d6ca7152823d44ad5709d13bcd06ee Mon Sep 17 00:00:00 2001 From: Lukacma Date: Wed, 25 Sep 2024 14:34:00 +0100 Subject: [PATCH 165/212] [AArch64] Implement intrinsics for SME2 FSCALE (#100128) This patch implements these intrinsics: FSCALE SINGLE AND MULTI ``` // Variants are also available for: // [_single_f32_x2], [_single_f64_x2], // [_single_f16_x4], [_single_f32_x4], [_single_f64_x4] svfloat16x2_t svscale[_single_f16_x2](svfloat16x2_t zd, svfloat16_t zm) __arm_streaming; // Variants are also available for: // [_f32_x2], [_f64_x2], // [_f16_x4], [_f32_x4], [_f64_x4] svfloat16x2_t svscale[_f16_x2](svfloat16x2_t zd, svfloat16x2_t zm) __arm_streaming ``` (cf. https://github.com/ARM-software/acle/pull/323) Co-authored-by: Caroline Concatto --- clang/include/clang/Basic/arm_sve.td | 10 + .../acle_sme2_fp8_scale.c | 452 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 25 + .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 28 ++ .../CodeGen/AArch64/sme2-intrinsics-fscale.ll | 186 +++++++ 5 files changed, 701 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index edf73d9022b06..da496e30fbb52 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2418,6 +2418,16 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; } +// +// Multi-vector scaling +// +let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { + def FSCALE_SINGLE_X2 : Inst<"svscale[_single_{d}_x2]", "22x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x2", [IsStreaming],[]>; + def FSCALE_SINGLE_X4 : Inst<"svscale[_single_{d}_x4]", "44x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x4", [IsStreaming],[]>; + def FSCALE_X2 : Inst<"svscale[_{d}_x2]", "222.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x2", [IsStreaming],[]>; + def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>; +} + let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { // == BFloat16 multiply-subtract == def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, VerifyRuntimeMode], []>; diff --git a/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c new file mode 100644 index 0000000000000..b733e772ba307 --- /dev/null +++ b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c @@ -0,0 +1,452 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + + +// Single x2 +// CHECK-LABEL: @test_svscale_single_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x213svfloat16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat16x2_t test_svscale_single_f16_x2(svfloat16x2_t op1, svint16_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f16_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x213svfloat32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat32x2_t test_svscale_single_f32_x2(svfloat32x2_t op1, svint32_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f32_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x213svfloat64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svfloat64x2_t test_svscale_single_f64_x2(svfloat64x2_t op1, svint64_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f64_x2)(op1, op2); +} + +// Single x4 +// CHECK-LABEL: @test_svscale_single_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x413svfloat16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svfloat16x4_t test_svscale_single_f16_x4(svfloat16x4_t op1, svint16_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f16_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x413svfloat32x4_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svfloat32x4_t test_svscale_single_f32_x4(svfloat32x4_t op1, svint32_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f32_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_single_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x413svfloat64x4_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svfloat64x4_t test_svscale_single_f64_x4(svfloat64x4_t op1, svint64_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_single_f64_x4)(op1, op2); +} + +// Multi x2 +// CHECK-LABEL: @test_svscale_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f16_x213svfloat16x2_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat16x2_t test_svscale_f16_x2(svfloat16x2_t op1, svint16x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f16_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f32_x213svfloat32x2_t11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x2_t test_svscale_f32_x2(svfloat32x2_t op1, svint32x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f32_x2)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f64_x213svfloat64x2_t11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2]], i64 2) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x2_t test_svscale_f64_x2(svfloat64x2_t op1, svint64x2_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f64_x2)(op1, op2); +} + +// Multi x4 +// CHECK-LABEL: @test_svscale_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) +// CHECK-NEXT: ret [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f16_x413svfloat16x4_t11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP16]] +// +svfloat16x4_t test_svscale_f16_x4(svfloat16x4_t op1, svint16x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f16_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 12) +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) +// CHECK-NEXT: ret [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f32_x413svfloat32x4_t11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 12) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP16]] +// +svfloat32x4_t test_svscale_f32_x4(svfloat32x4_t op1, svint32x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f32_x4)(op1, op2); +} + +// CHECK-LABEL: @test_svscale_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 6) +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) +// CHECK-NEXT: ret [[TMP16]] +// +// CPP-CHECK-LABEL: @_Z19test_svscale_f64_x413svfloat64x4_t11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 6) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP16]] +// +svfloat64x4_t test_svscale_f64_x4(svfloat64x4_t op1, svint64x4_t op2) __arm_streaming +{ + return SVE_ACLE_FUNC(svscale,_f64_x4)(op1, op2); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 8ffa2d0878e11..b2a2e11240186 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3762,6 +3762,31 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, IntrReadMem]>; + + // + // Register scaling + // + def int_aarch64_sme_fp8_scale_single_x2 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_single_x4 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_x2 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; + + def int_aarch64_sme_fp8_scale_x4 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], + [IntrNoMem]>; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 69806c9c3fdbf..dfb6b08b1f73b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5640,6 +5640,34 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) SelectDestructiveMultiIntrinsic(Node, 4, true, Op); return; + case Intrinsic::aarch64_sme_fp8_scale_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S, + AArch64::FSCALE_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S, + AArch64::FSCALE_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S, + AArch64::FSCALE_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sme_fp8_scale_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S, + AArch64::FSCALE_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; case Intrinsic::aarch64_sve_whilege_x2: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll new file mode 100644 index 0000000000000..591fe8da6b79c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+fp8 -force-streaming -verify-machineinstrs < %s | FileCheck %s + +; FSCALE (Single, x2) + +define { , } @multi_vec_scale_single_x2_half( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_scale_single_x2_float( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_scale_single_x2_double( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fscale { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; FSCALE (Single, x4) + +define { , , , } @multi_vec_scale_single_x4_half( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_single_x4_float( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_single_x4_double( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fscale { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; FSCALE (Multi, x2) +define { , } @multi_vec_scale_x2_half( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_scale_x2_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_scale_x2_float( %zdn1, %zdn2, %zm1, %zm2 ) { +; CHECK-LABEL: multi_vec_scale_x2_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_scale_x2_double( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_scale_x2_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fscale { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; FSCALE (Multi, x4) +define { , , , } @multi_vec_scale_x4_half( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_half: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_x4_float( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_float: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } @multi_vec_scale_x4_double( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_double: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fscale { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16(, , ) +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32(, , ) +declare { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64(, , ) + +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16(, ,, , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32(, ,, , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64(, ,, , ) + +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16(, , , ) +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32(, , , ) +declare { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64(, , , ) + +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16(, ,, , , , , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32(, ,, , , , , ) +declare { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64(, ,, , , , , ) From 35ae7ee925e0c6eab962910885db3314c4961aa8 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 25 Sep 2024 06:40:42 -0700 Subject: [PATCH 166/212] Remove spurious ; in ElimAvailExtern.cpp Fix post #109203 --- llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp index d3d27de4218c8..c997b180937af 100644 --- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -135,7 +135,6 @@ EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) { // for this contextual information. Eliding it in favor of the original would // undo these optimizations. if (!eliminateAvailableExternally(M, /*Convert=*/(CtxProf && !!(*CtxProf)))) - ; - return PreservedAnalyses::all(); + return PreservedAnalyses::all(); return PreservedAnalyses::none(); } From 22829f757dc76b23071d9438ae9c6ddc3e966db0 Mon Sep 17 00:00:00 2001 From: Edd Dawson Date: Wed, 25 Sep 2024 14:43:45 +0100 Subject: [PATCH 167/212] [PS4,PS5][Driver] Fix typo in comment (NFC) (#109980) --- clang/lib/Driver/ToolChains/PS4CPU.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 647580e4e235d..db77d058bcc59 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -338,7 +338,7 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple, } // Allow --sysroot= to override the root directory for header and library - // search, and -sysroot to override header search. If both are specified, + // search, and -isysroot to override header search. If both are specified, // -isysroot overrides --sysroot for header search. auto OverrideRoot = [&](const options::ID &Opt, std::string &Root, StringRef Default) { From cd6f4cc6e646718e1bf61685186a95d2634e2b53 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Wed, 25 Sep 2024 16:13:31 +0200 Subject: [PATCH 168/212] [LLD][COFF][NFC] Use CHPE version 2 in tests (#109872) --- lld/test/COFF/Inputs/loadconfig-arm64ec.s | 14 ++++------- lld/test/COFF/arm64ec-import.test | 29 +++++++++++------------ 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s index cb79b5c257e6e..80ec893869e6f 100644 --- a/lld/test/COFF/Inputs/loadconfig-arm64ec.s +++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s @@ -42,12 +42,6 @@ __os_arm64x_check_icall_cfg: .xword 0 __os_arm64x_dispatch_fptr: .xword 0 -__os_arm64x_helper0: - .xword 0 -__os_arm64x_helper1: - .xword 0 -__os_arm64x_helper2: - .xword 0 __os_arm64x_helper3: .xword 0 __os_arm64x_helper4: @@ -65,7 +59,7 @@ __os_arm64x_helper8: .globl __chpe_metadata .p2align 3, 0 __chpe_metadata: - .word 1 + .word 2 .rva __hybrid_code_map .word __hybrid_code_map_count .rva __x64_code_ranges_to_entry_points @@ -85,9 +79,9 @@ __chpe_metadata: .word __arm64x_extra_rfe_table_size .rva __os_arm64x_dispatch_fptr .rva __hybrid_auxiliary_iat_copy - .rva __os_arm64x_helper0 - .rva __os_arm64x_helper1 - .rva __os_arm64x_helper2 + .word 0 // __hybrid_auxiliary_delayload_iat + .word 0 // __hybrid_auxiliary_delayload_iat_copy + .word 0 // __hybrid_image_info_bitfield .rva __os_arm64x_helper3 .rva __os_arm64x_helper4 .rva __os_arm64x_helper5 diff --git a/lld/test/COFF/arm64ec-import.test b/lld/test/COFF/arm64ec-import.test index 08ff31ce1a8f3..9cf0914322941 100644 --- a/lld/test/COFF/arm64ec-import.test +++ b/lld/test/COFF/arm64ec-import.test @@ -27,7 +27,7 @@ RUN: llvm-readobj --coff-imports out2.dll | FileCheck --check-prefix=IMPORTS %s RUN: llvm-readobj --coff-imports out3.dll | FileCheck -check-prefix=IMPORTS %s IMPORTS: Import { IMPORTS-NEXT: Name: test.dll -IMPORTS-NEXT: ImportLookupTableRVA: 0x4230 +IMPORTS-NEXT: ImportLookupTableRVA: 0x4218 IMPORTS-NEXT: ImportAddressTableRVA: 0x3000 IMPORTS-NEXT: Symbol: data (0) IMPORTS-NEXT: Symbol: func (0) @@ -79,13 +79,13 @@ RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC % TESTSEC: 0x180007000 08500000 00300000 10500000 20500000 TESTSEC-NEXT: 0x180007010 08300000 00500000 10300000 20300000 TESTSEC-NEXT: 0x180007020 14100000 28100000 00200000 08100000 -TESTSEC-NEXT: 0x180007030 3c100000 a0420000 +TESTSEC-NEXT: 0x180007030 3c100000 88420000 RUN: llvm-readobj --hex-dump=.test out3.dll | FileCheck -check-prefix=TESTSEC-X64 %s TESTSEC-X64: 0x180007000 08300000 00300000 10300000 20300000 TESTSEC-X64-NEXT: 0x180007010 08300000 00500000 10300000 20300000 TESTSEC-X64-NEXT: 0x180007020 14100000 28100000 00200000 08100000 -TESTSEC-X64-NEXT: 0x180007030 3c100000 a0420000 +TESTSEC-X64-NEXT: 0x180007030 3c100000 88420000 RUN: FileCheck --check-prefix=MAP %s < out.map RUN: FileCheck --check-prefix=MAP %s < out2.map @@ -100,10 +100,10 @@ MAP-NEXT: 0002:00000000 __imp_data 0000000180003000 te MAP-NEXT: 0002:00000008 __imp_aux_func 0000000180003008 test{{.*}}:test.dll MAP-NEXT: 0002:00000010 __imp_aux_func2 0000000180003010 test{{.*}}:test.dll MAP-NEXT: 0002:00000020 __imp_aux_t2func 0000000180003020 test2{{.*}}:test2.dll -MAP: 0002:00001298 __auximpcopy_data 0000000180004298 test{{.*}}:test.dll -MAP-NEXT: 0002:000012a0 __auximpcopy_func 00000001800042a0 test{{.*}}:test.dll -MAP-NEXT: 0002:000012a8 __auximpcopy_func2 00000001800042a8 test{{.*}}:test.dll -MAP-NEXT: 0002:000012b8 __auximpcopy_t2func 00000001800042b8 test2{{.*}}:test2.dll +MAP: 0002:00001280 __auximpcopy_data 0000000180004280 test{{.*}}:test.dll +MAP-NEXT: 0002:00001288 __auximpcopy_func 0000000180004288 test{{.*}}:test.dll +MAP-NEXT: 0002:00001290 __auximpcopy_func2 0000000180004290 test{{.*}}:test.dll +MAP-NEXT: 0002:000012a0 __auximpcopy_t2func 00000001800042a0 test2{{.*}}:test2.dll MAP: 0002:00002000 __imp_aux_data 0000000180005000 test{{.*}}:test.dll MAP-NEXT: 0002:00002008 __imp_func 0000000180005008 test{{.*}}:test.dll MAP-NEXT: 0002:00002010 __imp_func2 0000000180005010 test{{.*}}:test.dll @@ -120,15 +120,14 @@ RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=LOADCONFI RUN: llvm-readobj --coff-load-config out2.dll | FileCheck -check-prefix=LOADCONFIG %s RUN: llvm-readobj --coff-load-config out3.dll | FileCheck -check-prefix=LOADCONFIG %s LOADCONFIG: AuxiliaryIAT: 0x5000 -LOADCONFIG: AuxiliaryIATCopy: 0x4298 +LOADCONFIG: AuxiliaryIATCopy: 0x4280 RUN: llvm-readobj --hex-dump=.rdata out.dll | FileCheck -check-prefix=RDATA %s RUN: llvm-readobj --hex-dump=.rdata out2.dll | FileCheck -check-prefix=RDATA %s RUN: llvm-readobj --hex-dump=.rdata out3.dll | FileCheck -check-prefix=RDATA %s -RDATA: 0x180004290 2e646c6c 00000000 00000000 00000000 -RDATA-NEXT: 0x1800042a0 14100080 01000000 28100080 01000000 -RDATA-NEXT: 0x1800042b0 00000000 00000000 48100080 01000000 -RDATA-NEXT: 0x1800042c0 00000000 00000000 00000000 00000000 +RDATA: 0x180004280 00000000 00000000 14100080 01000000 +RDATA-NEXT: 0x180004290 28100080 01000000 00000000 00000000 +RDATA-NEXT: 0x1800042a0 48100080 01000000 00000000 00000000 RDATA: 0x180005000 00000000 00000000 14100080 01000000 RDATA-NEXT: 0x180005010 28100080 01000000 00000000 00000000 RDATA-NEXT: 0x180005020 48100080 01000000 00000000 00000000 @@ -138,15 +137,15 @@ RUN: llvm-readobj --coff-basereloc out2.dll | FileCheck -check-prefix=BASERELOC RUN: llvm-readobj --coff-basereloc out3.dll | FileCheck -check-prefix=BASERELOC %s BASERELOC: BaseReloc [ Aux IAT copy: -BASERELOC: Address: 0x42A0 +BASERELOC: Address: 0x4288 BASERELOC-NEXT: } BASERELOC-NEXT: Entry { BASERELOC-NEXT: Type: DIR64 -BASERELOC-NEXT: Address: 0x42A8 +BASERELOC-NEXT: Address: 0x4290 BASERELOC-NEXT: } BASERELOC-NEXT: Entry { BASERELOC-NEXT: Type: DIR64 -BASERELOC-NEXT: Address: 0x42B8 +BASERELOC-NEXT: Address: 0x42A0 BASERELOC-NEXT: } Aux IAT: BASERELOC-NOT: Address: 0x5000 From 3477eb722fe094a6143108813ff017145aa9ef8a Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 07:15:08 -0700 Subject: [PATCH 169/212] [rtsan][NFC] Move away from system include style for local headers (#109977) --- compiler-rt/lib/rtsan/rtsan.cpp | 10 +++++----- compiler-rt/lib/rtsan/rtsan_context.cpp | 6 +++--- compiler-rt/lib/rtsan/rtsan_context.h | 2 -- compiler-rt/lib/rtsan/rtsan_preinit.cpp | 2 +- compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp | 7 ++++--- .../lib/rtsan/tests/rtsan_test_interceptors.cpp | 4 ++-- 6 files changed, 15 insertions(+), 16 deletions(-) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 1e10069f51dd3..f02e89421035c 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -8,11 +8,11 @@ // //===----------------------------------------------------------------------===// -#include -#include -#include -#include -#include +#include "rtsan/rtsan.h" +#include "rtsan/rtsan_assertions.h" +#include "rtsan/rtsan_diagnostics.h" +#include "rtsan/rtsan_flags.h" +#include "rtsan/rtsan_interceptors.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" diff --git a/compiler-rt/lib/rtsan/rtsan_context.cpp b/compiler-rt/lib/rtsan/rtsan_context.cpp index 37ac817db76e4..1cf1791f0aaf8 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.cpp +++ b/compiler-rt/lib/rtsan/rtsan_context.cpp @@ -8,10 +8,10 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include "rtsan/rtsan_context.h" +#include "rtsan/rtsan.h" -#include +#include "sanitizer_common/sanitizer_allocator_internal.h" #include #include diff --git a/compiler-rt/lib/rtsan/rtsan_context.h b/compiler-rt/lib/rtsan/rtsan_context.h index 8512017793a48..cb0c2eb0a5e0d 100644 --- a/compiler-rt/lib/rtsan/rtsan_context.h +++ b/compiler-rt/lib/rtsan/rtsan_context.h @@ -10,8 +10,6 @@ #pragma once -#include - namespace __rtsan { class Context { diff --git a/compiler-rt/lib/rtsan/rtsan_preinit.cpp b/compiler-rt/lib/rtsan/rtsan_preinit.cpp index 1307268951fbc..5d49223bc8beb 100644 --- a/compiler-rt/lib/rtsan/rtsan_preinit.cpp +++ b/compiler-rt/lib/rtsan/rtsan_preinit.cpp @@ -8,8 +8,8 @@ // //===----------------------------------------------------------------------===// +#include "rtsan/rtsan.h" #include "sanitizer_common/sanitizer_internal_defs.h" -#include #if SANITIZER_CAN_USE_PREINIT_ARRAY diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp index dff3c527350fd..9e455f0326a54 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp @@ -15,9 +15,10 @@ #include "gtest/gtest.h" #include "rtsan_test_utilities.h" -#include -#include -#include + +#include "rtsan/rtsan.h" +#include "sanitizer_common/sanitizer_platform.h" +#include "sanitizer_common/sanitizer_platform_interceptors.h" #include #include diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp index e96d3758bcaf8..c65b1bb01fbe0 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors.cpp @@ -10,8 +10,8 @@ #include "gtest/gtest.h" -#include -#include +#include "sanitizer_common/sanitizer_platform.h" +#include "sanitizer_common/sanitizer_platform_interceptors.h" #include "rtsan_test_utilities.h" From e9cb44090ff7b3feda386ca1ee1252ab47c0617e Mon Sep 17 00:00:00 2001 From: Evgenii Kudriashov Date: Wed, 25 Sep 2024 17:15:36 +0300 Subject: [PATCH 170/212] [X86][GlobalISel] Enable scalar versions of G_UITOFP and G_FPTOUI (#100079) Also add tests for G_SITOFP and G_FPTOSI --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 86 +++- .../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 99 +++++ llvm/lib/Target/X86/GISel/X86LegalizerInfo.h | 6 + .../Target/X86/GISel/X86RegisterBankInfo.cpp | 10 +- llvm/test/CodeGen/X86/isel-fp-to-int.ll | 391 +++++++++++++++++ llvm/test/CodeGen/X86/isel-int-to-fp.ll | 395 ++++++++++++++++++ 7 files changed, 979 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/X86/isel-fp-to-int.ll create mode 100644 llvm/test/CodeGen/X86/isel-int-to-fp.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 5360850deeffd..ecade6b5caed6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -394,6 +394,8 @@ class LegalizerHelper { LegalizeResult lowerRotate(MachineInstr &MI); LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); + LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI); + LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI); LegalizeResult lowerUITOFP(MachineInstr &MI); LegalizeResult lowerSITOFP(MachineInstr &MI); LegalizeResult lowerFPTOUI(MachineInstr &MI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e64d3f51a0111..c3b6b3033cf5c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7169,6 +7169,78 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { return Legalized; } +// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit +// operations and G_SITOFP +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerU64ToF32WithSITOFP(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + const LLT S1 = LLT::scalar(1); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); + + // For i64 < INT_MAX we simply reuse SITOFP. + // Otherwise, divide i64 by 2, round result by ORing with the lowest bit + // saved before division, convert to float by SITOFP, multiply the result + // by 2. + auto One = MIRBuilder.buildConstant(S64, 1); + auto Zero = MIRBuilder.buildConstant(S64, 0); + // Result if Src < INT_MAX + auto SmallResult = MIRBuilder.buildSITOFP(S32, Src); + // Result if Src >= INT_MAX + auto Halved = MIRBuilder.buildLShr(S64, Src, One); + auto LowerBit = MIRBuilder.buildAnd(S64, Src, One); + auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit); + auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved); + auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP); + // Check if the original value is larger than INT_MAX by comparing with + // zero to pick one of the two conversions. + auto IsLarge = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero); + MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult); + + MI.eraseFromParent(); + return Legalized; +} + +// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an +// IEEE double representation. +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerU64ToF64BitFloatOps(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64); + + // We create double value from 32 bit parts with 32 exponent difference. + // Note that + and - are float operations that adjust the implicit leading + // one, the bases 2^52 and 2^84 are for illustrative purposes. + // + // X = 2^52 * 1.0...LowBits + // Y = 2^84 * 1.0...HighBits + // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0 + // = - 2^52 * 1.0...HighBits + // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits + auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000)); + auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000)); + auto TwoP52P84 = llvm::bit_cast(UINT64_C(0x4530000000100000)); + auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84); + auto HalfWidth = MIRBuilder.buildConstant(S64, 32); + + auto LowBits = MIRBuilder.buildTrunc(S32, Src); + LowBits = MIRBuilder.buildZExt(S64, LowBits); + auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits); + auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth); + auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits); + auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP); + MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); @@ -7183,13 +7255,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) { if (SrcTy != LLT::scalar(64)) return UnableToLegalize; - if (DstTy == LLT::scalar(32)) { + if (DstTy == LLT::scalar(32)) // TODO: SelectionDAG has several alternative expansions to port which may - // be more reasonble depending on the available instructions. If a target - // has sitofp, does not have CTLZ, or can efficiently use f64 as an - // intermediate type, this is probably worse. - return lowerU64ToF32BitOps(MI); - } + // be more reasonable depending on the available instructions. We also need + // a more advanced mechanism to choose an optimal version depending on + // target features such as sitofp or CTLZ availability. + return lowerU64ToF32WithSITOFP(MI); + + if (DstTy == LLT::scalar(64)) + return lowerU64ToF64BitFloatOps(MI); return UnableToLegalize; } diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 7169d588548b0..bab7fe9d25e44 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -498,6 +498,62 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .clampScalar(0, s32, sMaxScalar) .widenScalarToNextPow2(1); + // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types + // <= s32 manually. Otherwise, in custom handler there is no way to + // understand whether s32 is an original type and we need to promote it to + // s64 or s32 is obtained after widening and we shouldn't widen it to s64. + // + // For AVX512 we simply widen types as there is direct mapping from opcodes + // to asm instructions. + getActionDefinitionsBuilder(G_UITOFP) + .legalIf([=](const LegalityQuery &Query) { + return HasAVX512 && typeInSet(0, {s32, s64})(Query) && + typeInSet(1, {s32, s64})(Query); + }) + .customIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(0, s32)(Query)) || + (HasSSE2 && typeIs(0, s64)(Query))) && + scalarNarrowerThan(1, Is64Bit ? 64 : 32)(Query); + }) + .lowerIf([=](const LegalityQuery &Query) { + // Lower conversions from s64 + return !HasAVX512 && + ((HasSSE1 && typeIs(0, s32)(Query)) || + (HasSSE2 && typeIs(0, s64)(Query))) && + (Is64Bit && typeIs(1, s64)(Query)); + }) + .clampScalar(0, s32, HasSSE2 ? s64 : s32) + .widenScalarToNextPow2(0) + .clampScalar(1, s32, sMaxScalar) + .widenScalarToNextPow2(1); + + getActionDefinitionsBuilder(G_FPTOUI) + .legalIf([=](const LegalityQuery &Query) { + return HasAVX512 && typeInSet(0, {s32, s64})(Query) && + typeInSet(1, {s32, s64})(Query); + }) + .customIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(1, s32)(Query)) || + (HasSSE2 && typeIs(1, s64)(Query))) && + scalarNarrowerThan(0, Is64Bit ? 64 : 32)(Query); + }) + // TODO: replace with customized legalization using + // specifics of cvttsd2si. The selection of this node requires + // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced + // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand. + .lowerIf([=](const LegalityQuery &Query) { + return !HasAVX512 && + ((HasSSE1 && typeIs(1, s32)(Query)) || + (HasSSE2 && typeIs(1, s64)(Query))) && + (Is64Bit && typeIs(0, s64)(Query)); + }) + .clampScalar(0, s32, sMaxScalar) + .widenScalarToNextPow2(0) + .clampScalar(1, s32, HasSSE2 ? s64 : s32) + .widenScalarToNextPow2(1); + // vector ops getActionDefinitionsBuilder(G_BUILD_VECTOR) .customIf([=](const LegalityQuery &Query) { @@ -590,6 +646,10 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, return false; case TargetOpcode::G_BUILD_VECTOR: return legalizeBuildVector(MI, MRI, Helper); + case TargetOpcode::G_FPTOUI: + return legalizeFPTOUI(MI, MRI, Helper); + case TargetOpcode::G_UITOFP: + return legalizeUITOFP(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); } @@ -645,6 +705,45 @@ bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, return true; } +bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); + unsigned DstSizeInBits = DstTy.getScalarSizeInBits(); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + + // Simply reuse FPTOSI when it is possible to widen the type + if (DstSizeInBits <= 32) { + auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src); + MIRBuilder.buildTrunc(Dst, Casted); + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); + const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); + + // Simply reuse SITOFP when it is possible to widen the type + if (SrcTy.getSizeInBits() <= 32) { + auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src); + MIRBuilder.buildSITOFP(Dst, Ext); + MI.eraseFromParent(); + return true; + } + + return false; +} + bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { return true; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 229a58986903d..39bd9892e2f16 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -39,6 +39,12 @@ class X86LegalizerInfo : public LegalizerInfo { private: bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + + bool legalizeFPTOUI(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; + + bool legalizeUITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; }; } // namespace llvm #endif diff --git a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp index 61633a09d93cf..43c0145ec8e2a 100644 --- a/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86RegisterBankInfo.cpp @@ -296,7 +296,9 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { getInstrPartialMappingIdxs(MI, MRI, /* isFP= */ true, OpRegBankIdx); break; case TargetOpcode::G_SITOFP: - case TargetOpcode::G_FPTOSI: { + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_UITOFP: + case TargetOpcode::G_FPTOUI: { // Some of the floating-point instructions have mixed GPR and FP // operands: fine-tune the computed mapping. auto &Op0 = MI.getOperand(0); @@ -304,10 +306,10 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const LLT Ty0 = MRI.getType(Op0.getReg()); const LLT Ty1 = MRI.getType(Op1.getReg()); - bool FirstArgIsFP = Opc == TargetOpcode::G_SITOFP; - bool SecondArgIsFP = Opc == TargetOpcode::G_FPTOSI; + bool FirstArgIsFP = + Opc == TargetOpcode::G_SITOFP || Opc == TargetOpcode::G_UITOFP; OpRegBankIdx[0] = getPartialMappingIdx(MI, Ty0, /* isFP= */ FirstArgIsFP); - OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ SecondArgIsFP); + OpRegBankIdx[1] = getPartialMappingIdx(MI, Ty1, /* isFP= */ !FirstArgIsFP); break; } case TargetOpcode::G_FCMP: { diff --git a/llvm/test/CodeGen/X86/isel-fp-to-int.ll b/llvm/test/CodeGen/X86/isel-fp-to-int.ll new file mode 100644 index 0000000000000..fae3db6ad0afa --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-fp-to-int.ll @@ -0,0 +1,391 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64 +; RUN: llc < %s -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512 + +define i64 @test_double_to_ui64(double %x) { +; SDAG-X64-LABEL: test_double_to_ui64: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: cvttsd2si %xmm0, %rcx +; SDAG-X64-NEXT: movq %rcx, %rdx +; SDAG-X64-NEXT: sarq $63, %rdx +; SDAG-X64-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SDAG-X64-NEXT: cvttsd2si %xmm0, %rax +; SDAG-X64-NEXT: andq %rdx, %rax +; SDAG-X64-NEXT: orq %rcx, %rax +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_double_to_ui64: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvttsd2si %xmm0, %rcx +; GISEL-X64-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] +; GISEL-X64-NEXT: movapd %xmm0, %xmm2 +; GISEL-X64-NEXT: subsd %xmm1, %xmm2 +; GISEL-X64-NEXT: cvttsd2si %xmm2, %rdx +; GISEL-X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; GISEL-X64-NEXT: xorq %rdx, %rax +; GISEL-X64-NEXT: xorl %edx, %edx +; GISEL-X64-NEXT: ucomisd %xmm1, %xmm0 +; GISEL-X64-NEXT: setb %dl +; GISEL-X64-NEXT: andl $1, %edx +; GISEL-X64-NEXT: cmovneq %rcx, %rax +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_ui64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2usi %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i64 + ret i64 %conv +} + +define i32 @test_double_to_ui32(double %x) { +; X64-LABEL: test_double_to_ui32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_ui32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i32 + ret i32 %conv +} + +define zeroext i16 @test_double_to_ui16(double %x) { +; X64-LABEL: test_double_to_ui16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_double_to_ui16: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttsd2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_double_to_ui16: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i16 + ret i16 %conv +} + +define zeroext i8 @test_double_to_ui8(double %x) { +; X64-LABEL: test_double_to_ui8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_double_to_ui8: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttsd2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $al killed $al killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_double_to_ui8: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttsd2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $al killed $al killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui double %x to i8 + ret i8 %conv +} + +define i64 @test_float_to_ui64(float %x) { +; SDAG-X64-LABEL: test_float_to_ui64: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: cvttss2si %xmm0, %rcx +; SDAG-X64-NEXT: movq %rcx, %rdx +; SDAG-X64-NEXT: sarq $63, %rdx +; SDAG-X64-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SDAG-X64-NEXT: cvttss2si %xmm0, %rax +; SDAG-X64-NEXT: andq %rdx, %rax +; SDAG-X64-NEXT: orq %rcx, %rax +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_float_to_ui64: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvttss2si %xmm0, %rcx +; GISEL-X64-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] +; GISEL-X64-NEXT: movaps %xmm0, %xmm2 +; GISEL-X64-NEXT: subss %xmm1, %xmm2 +; GISEL-X64-NEXT: cvttss2si %xmm2, %rdx +; GISEL-X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; GISEL-X64-NEXT: xorq %rdx, %rax +; GISEL-X64-NEXT: xorl %edx, %edx +; GISEL-X64-NEXT: ucomiss %xmm1, %xmm0 +; GISEL-X64-NEXT: setb %dl +; GISEL-X64-NEXT: andl $1, %edx +; GISEL-X64-NEXT: cmovneq %rcx, %rax +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_ui64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2usi %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i64 + ret i64 %conv +} + +define i32 @test_float_to_ui32(float %x) { +; X64-LABEL: test_float_to_ui32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_ui32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2usi %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i32 + ret i32 %conv +} + +define zeroext i16 @test_float_to_ui16(float %x) { +; X64-LABEL: test_float_to_ui16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_float_to_ui16: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttss2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_float_to_ui16: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttss2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i16 + ret i16 %conv +} + +define zeroext i8 @test_float_to_ui8(float %x) { +; X64-LABEL: test_float_to_ui8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_float_to_ui8: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvttss2si %xmm0, %eax +; SDAG-AVX512-NEXT: # kill: def $al killed $al killed $eax +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_float_to_ui8: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvttss2usi %xmm0, %eax +; GISEL-AVX512-NEXT: # kill: def $al killed $al killed $eax +; GISEL-AVX512-NEXT: retq +entry: + %conv = fptoui float %x to i8 + ret i8 %conv +} + +define i64 @test_double_to_si64(double %x) { +; X64-LABEL: test_double_to_si64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i64 + ret i64 %conv +} + +define i32 @test_double_to_si32(double %x) { +; X64-LABEL: test_double_to_si32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i32 + ret i32 %conv +} + +define signext i16 @test_double_to_si16(double %x) { +; X64-LABEL: test_double_to_si16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si16: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i16 + ret i16 %conv +} + +define signext i8 @test_double_to_si8(double %x) { +; X64-LABEL: test_double_to_si8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si8: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i8 + ret i8 %conv +} + +define i31 @test_double_to_si31(double %x) { +; X64-LABEL: test_double_to_si31: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si31: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i31 + ret i31 %conv +} + +define i33 @test_double_to_si33(double %x) { +; X64-LABEL: test_double_to_si33: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttsd2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_double_to_si33: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttsd2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi double %x to i33 + ret i33 %conv +} + +define i64 @test_float_to_si64(float %x) { +; X64-LABEL: test_float_to_si64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i64 + ret i64 %conv +} + +define i32 @test_float_to_si32(float %x) { +; X64-LABEL: test_float_to_si32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si32: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i32 + ret i32 %conv +} + +define signext i16 @test_float_to_si16(float %x) { +; X64-LABEL: test_float_to_si16: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si16: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: # kill: def $ax killed $ax killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i16 + ret i16 %conv +} + +define signext i8 @test_float_to_si8(float %x) { +; X64-LABEL: test_float_to_si8: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si8: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: # kill: def $al killed $al killed $eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i8 + ret i8 %conv +} + +define i31 @test_float_to_si31(float %x) { +; X64-LABEL: test_float_to_si31: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si31: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %eax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i31 + ret i31 %conv +} + +define i33 @test_float_to_si33(float %x) { +; X64-LABEL: test_float_to_si33: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttss2si %xmm0, %rax +; X64-NEXT: retq +; +; AVX512-LABEL: test_float_to_si33: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvttss2si %xmm0, %rax +; AVX512-NEXT: retq +entry: + %conv = fptosi float %x to i33 + ret i33 %conv +} diff --git a/llvm/test/CodeGen/X86/isel-int-to-fp.ll b/llvm/test/CodeGen/X86/isel-int-to-fp.ll new file mode 100644 index 0000000000000..fc99ff95788f3 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-int-to-fp.ll @@ -0,0 +1,395 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,SDAG-X64 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes X64,GISEL-X64 +; RUN: llc < %s -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,SDAG-AVX512 +; RUN: llc < %s -global-isel -global-isel-abort=1 -mattr=+avx512f -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefixes AVX512,GISEL-AVX512 + +define double @test_ui64_to_double(i64 %x) { +; SDAG-X64-LABEL: test_ui64_to_double: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: movq %rdi, %xmm1 +; SDAG-X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; SDAG-X64-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SDAG-X64-NEXT: movapd %xmm1, %xmm0 +; SDAG-X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SDAG-X64-NEXT: addsd %xmm1, %xmm0 +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_ui64_to_double: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: movabsq $4841369599423283200, %rax # imm = 0x4330000000000000 +; GISEL-X64-NEXT: movabsq $4985484787499139072, %rcx # imm = 0x4530000000000000 +; GISEL-X64-NEXT: movsd {{.*#+}} xmm0 = [1.9342813118337666E+25,0.0E+0] +; GISEL-X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF +; GISEL-X64-NEXT: andq %rdi, %rdx +; GISEL-X64-NEXT: orq %rax, %rdx +; GISEL-X64-NEXT: shrq $32, %rdi +; GISEL-X64-NEXT: orq %rdi, %rcx +; GISEL-X64-NEXT: movq %rcx, %xmm1 +; GISEL-X64-NEXT: subsd %xmm0, %xmm1 +; GISEL-X64-NEXT: movq %rdx, %xmm0 +; GISEL-X64-NEXT: addsd %xmm1, %xmm0 +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_ui64_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i64 %x to double + ret double %conv +} + +define double @test_ui32_to_double(i32 %x) { +; X64-LABEL: test_ui32_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2sd %rax, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_ui32_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i32 %x to double + ret double %conv +} + +define double @test_ui16_to_double(i16 zeroext %x) { +; X64-LABEL: test_ui16_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui16_to_double: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui16_to_double: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i16 %x to double + ret double %conv +} + +define double @test_ui8_to_double(i8 zeroext %x) { +; X64-LABEL: test_ui8_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui8_to_double: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui8_to_double: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i8 %x to double + ret double %conv +} + +define float @test_ui64_to_float(i64 %x) { +; SDAG-X64-LABEL: test_ui64_to_float: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: testq %rdi, %rdi +; SDAG-X64-NEXT: js .LBB4_1 +; SDAG-X64-NEXT: # %bb.2: # %entry +; SDAG-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; SDAG-X64-NEXT: retq +; SDAG-X64-NEXT: .LBB4_1: +; SDAG-X64-NEXT: movq %rdi, %rax +; SDAG-X64-NEXT: shrq %rax +; SDAG-X64-NEXT: andl $1, %edi +; SDAG-X64-NEXT: orq %rax, %rdi +; SDAG-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; SDAG-X64-NEXT: addss %xmm0, %xmm0 +; SDAG-X64-NEXT: retq +; +; GISEL-X64-LABEL: test_ui64_to_float: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; GISEL-X64-NEXT: movq %rdi, %rax +; GISEL-X64-NEXT: shrq %rax +; GISEL-X64-NEXT: movq %rdi, %rcx +; GISEL-X64-NEXT: andq $1, %rcx +; GISEL-X64-NEXT: orq %rax, %rcx +; GISEL-X64-NEXT: cvtsi2ss %rcx, %xmm1 +; GISEL-X64-NEXT: addss %xmm1, %xmm1 +; GISEL-X64-NEXT: xorl %eax, %eax +; GISEL-X64-NEXT: cmpq $0, %rdi +; GISEL-X64-NEXT: setl %al +; GISEL-X64-NEXT: andl $1, %eax +; GISEL-X64-NEXT: movd %xmm1, %eax +; GISEL-X64-NEXT: movd %xmm0, %ecx +; GISEL-X64-NEXT: cmovnel %eax, %ecx +; GISEL-X64-NEXT: movd %ecx, %xmm0 +; GISEL-X64-NEXT: retq +; +; AVX512-LABEL: test_ui64_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i64 %x to float + ret float %conv +} + +define float @test_ui32_to_float(i32 %x) { +; X64-LABEL: test_ui32_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2ss %rax, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_ui32_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = uitofp i32 %x to float + ret float %conv +} + +define float @test_ui16_to_float(i16 zeroext %x) { +; X64-LABEL: test_ui16_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui16_to_float: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui16_to_float: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i16 %x to float + ret float %conv +} + +define float @test_ui8_to_float(i8 zeroext %x) { +; X64-LABEL: test_ui8_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; SDAG-AVX512-LABEL: test_ui8_to_float: +; SDAG-AVX512: # %bb.0: # %entry +; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; SDAG-AVX512-NEXT: retq +; +; GISEL-AVX512-LABEL: test_ui8_to_float: +; GISEL-AVX512: # %bb.0: # %entry +; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; GISEL-AVX512-NEXT: retq +entry: + %conv = uitofp i8 %x to float + ret float %conv +} + +define double @test_si64_to_double(i64 %x) { +; X64-LABEL: test_si64_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si64_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i64 %x to double + ret double %conv +} + +define double @test_si32_to_double(i32 %x) { +; X64-LABEL: test_si32_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si32_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i32 %x to double + ret double %conv +} + +define double @test_si16_to_double(i16 signext %x) { +; X64-LABEL: test_si16_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si16_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i16 %x to double + ret double %conv +} + +define double @test_si8_to_double(i8 signext %x) { +; X64-LABEL: test_si8_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si8_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i8 %x to double + ret double %conv +} + +define double @test_si31_to_double(i31 %x) { +; X64-LABEL: test_si31_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: addl %edi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: cvtsi2sd %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si31_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: addl %edi, %edi +; AVX512-NEXT: sarl %edi +; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i31 %x to double + ret double %conv +} + +define double @test_si33_to_double(i33 %x) { +; X64-LABEL: test_si33_to_double: +; X64: # %bb.0: # %entry +; X64-NEXT: shlq $31, %rdi +; X64-NEXT: sarq $31, %rdi +; X64-NEXT: cvtsi2sd %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si33_to_double: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: shlq $31, %rdi +; AVX512-NEXT: sarq $31, %rdi +; AVX512-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i33 %x to double + ret double %conv +} + +define float @test_si64_to_float(i64 %x) { +; X64-LABEL: test_si64_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si64_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i64 %x to float + ret float %conv +} + +define float @test_si32_to_float(i32 %x) { +; X64-LABEL: test_si32_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si32_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i32 %x to float + ret float %conv +} + +define float @test_si16_to_float(i16 signext %x) { +; X64-LABEL: test_si16_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si16_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i16 %x to float + ret float %conv +} + +define float @test_si8_to_float(i8 signext %x) { +; X64-LABEL: test_si8_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si8_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i8 %x to float + ret float %conv +} + +define float @test_si31_to_float(i31 %x) { +; X64-LABEL: test_si31_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: addl %edi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si31_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: addl %edi, %edi +; AVX512-NEXT: sarl %edi +; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i31 %x to float + ret float %conv +} + +define float @test_si33_to_float(i33 %x) { +; X64-LABEL: test_si33_to_float: +; X64: # %bb.0: # %entry +; X64-NEXT: shlq $31, %rdi +; X64-NEXT: sarq $31, %rdi +; X64-NEXT: cvtsi2ss %rdi, %xmm0 +; X64-NEXT: retq +; +; AVX512-LABEL: test_si33_to_float: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: shlq $31, %rdi +; AVX512-NEXT: sarq $31, %rdi +; AVX512-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 +; AVX512-NEXT: retq +entry: + %conv = sitofp i33 %x to float + ret float %conv +} From aea06684992873f70c5834e2f455f913e5b8d671 Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Wed, 25 Sep 2024 16:19:02 +0200 Subject: [PATCH 171/212] [lldb][test] Use tools from llvm instead of compiler tools (#109961) In #102185, toolchain detection for API tests has been rewritten in Python. Tools paths for tests there are determined from compiler path. Here tools are taken from `--llvm-tools-dir` dotest.py argument, which by default refers to the LLVM build directory, unless they are explicitly redefined in environment variables. It helps to minimize external dependencies and to maximize the reproducibility of the build. --- .../Python/lldbsuite/test/builders/builder.py | 12 ++++++++++-- lldb/packages/Python/lldbsuite/test/configuration.py | 3 +++ lldb/packages/Python/lldbsuite/test/dotest.py | 1 + lldb/test/API/functionalities/archives/Makefile | 2 -- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/builders/builder.py b/lldb/packages/Python/lldbsuite/test/builders/builder.py index 564918c58b6dd..e3099219e437e 100644 --- a/lldb/packages/Python/lldbsuite/test/builders/builder.py +++ b/lldb/packages/Python/lldbsuite/test/builders/builder.py @@ -110,6 +110,10 @@ def getToolchainSpec(self, compiler): if not cc: return [] + exe_ext = "" + if lldbplatformutil.getHostPlatform() == "windows": + exe_ext = ".exe" + cc = cc.strip() cc_path = pathlib.Path(cc) @@ -149,9 +153,9 @@ def getToolchainSpec(self, compiler): cc_dir = cc_path.parent def getToolchainUtil(util_name): - return cc_dir / (cc_prefix + util_name + cc_ext) + return os.path.join(configuration.llvm_tools_dir, util_name + exe_ext) - cxx = getToolchainUtil(cxx_type) + cxx = cc_dir / (cc_prefix + cxx_type + cc_ext) util_names = { "OBJCOPY": "objcopy", @@ -161,6 +165,10 @@ def getToolchainUtil(util_name): } utils = [] + # Required by API TestBSDArchives.py tests. + if not os.getenv("LLVM_AR"): + utils.extend(["LLVM_AR=%s" % getToolchainUtil("llvm-ar")]) + if not lldbplatformutil.platformIsDarwin(): if cc_type in ["clang", "cc", "gcc"]: util_paths = {} diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py index 27eef040497d1..1bacd74a968c3 100644 --- a/lldb/packages/Python/lldbsuite/test/configuration.py +++ b/lldb/packages/Python/lldbsuite/test/configuration.py @@ -118,6 +118,9 @@ # same base name. all_tests = set() +# Path to LLVM tools to be used by tests. +llvm_tools_dir = None + # LLDB library directory. lldb_libs_dir = None lldb_obj_root = None diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index f14a00a2394b0..b1ae896d3fd3b 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -280,6 +280,7 @@ def parseOptionsAndInitTestdirs(): "xcrun -find -toolchain default dsymutil" ) if args.llvm_tools_dir: + configuration.llvm_tools_dir = args.llvm_tools_dir configuration.filecheck = shutil.which("FileCheck", path=args.llvm_tools_dir) configuration.yaml2obj = shutil.which("yaml2obj", path=args.llvm_tools_dir) diff --git a/lldb/test/API/functionalities/archives/Makefile b/lldb/test/API/functionalities/archives/Makefile index c4c593e6db051..4b9696e26b575 100644 --- a/lldb/test/API/functionalities/archives/Makefile +++ b/lldb/test/API/functionalities/archives/Makefile @@ -12,12 +12,10 @@ libfoo.a: a.o b.o # This tests whether lldb can load a thin archive libbar.a: c.o - $(eval LLVM_AR := $(LLVM_TOOLS_DIR)/llvm-ar) $(eval LLVM_ARFLAGS := -rcsDT) $(LLVM_AR) $(LLVM_ARFLAGS) $@ $^ libfoo-thin.a: a.o b.o - $(eval LLVM_AR := $(LLVM_TOOLS_DIR)/llvm-ar) $(eval LLVM_ARFLAGS := -rcsUT) $(LLVM_AR) $(LLVM_ARFLAGS) $@ $^ From 3469db82b5c821c94b58c0b81f03bbef51efa30b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 10:23:41 -0400 Subject: [PATCH 172/212] [SLP]Add subvector vectorization for non-load nodes Previously SLP vectorize supported clustered vectorization for loads only. This patch adds support for "clustered" vectorization for other instructions. If the buildvector node contains "clusters", which can be vectorized separately and then inserted into the resulting buildvector result, it is better to do, since it may reduce the cost of the vector graph and produce better vector code. The patch does some analysis, if it is profitable to try to do this kind of extra vectorization. It checks the scalar instructions and its operands and tries to vectorize them only if they result in a better graph. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/108430 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 161 +++++++++++++++--- .../SLPVectorizer/AArch64/loadorder.ll | 24 +-- .../vectorizable-selects-uniform-cmps.ll | 10 +- .../buildvector-postpone-for-dependency.ll | 6 +- .../SLPVectorizer/X86/landing_pad.ll | 19 +-- llvm/test/Transforms/SLPVectorizer/X86/phi.ll | 41 ++--- .../SLPVectorizer/X86/reduction-logical.ll | 11 +- .../Transforms/SLPVectorizer/X86/resched.ll | 43 +++-- 8 files changed, 212 insertions(+), 103 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 414c6388c777b..3695a8082531c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1347,6 +1347,7 @@ class BoUpSLP { } MinBWs.clear(); ReductionBitWidth = 0; + BaseGraphSize = 1; CastMaxMinBWSizes.reset(); ExtraBitWidthNodes.clear(); InstrElementSize.clear(); @@ -1355,11 +1356,10 @@ class BoUpSLP { ValueToGatherNodes.clear(); } - unsigned getTreeSize() const { - return GatheredLoadsEntriesFirst == NoGatheredLoads - ? VectorizableTree.size() - : GatheredLoadsEntriesFirst; - } + unsigned getTreeSize() const { return VectorizableTree.size(); } + + /// Returns the base graph size, before any transformations. + unsigned getCanonicalGraphSize() const { return BaseGraphSize; } /// Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -4191,6 +4191,9 @@ class BoUpSLP { /// reduction. unsigned ReductionBitWidth = 0; + /// Canonical graph size before the transformations. + unsigned BaseGraphSize = 1; + /// If the tree contains any zext/sext/trunc nodes, contains max-min pair of /// type sizes, used in the tree. std::optional> CastMaxMinBWSizes; @@ -9001,47 +9004,147 @@ getGEPCosts(const TargetTransformInfo &TTI, ArrayRef Ptrs, void BoUpSLP::transformNodes() { constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + BaseGraphSize = VectorizableTree.size(); + // Operands are profitable if they are: + // 1. At least one constant + // or + // 2. Splats + // or + // 3. Results in good vectorization opportunity, i.e. may generate vector + // nodes and reduce cost of the graph. + auto CheckOperandsProfitability = [this](Instruction *I1, Instruction *I2, + const InstructionsState &S) { + SmallVector>> Candidates; + for (unsigned Op : seq(S.MainOp->getNumOperands())) + Candidates.emplace_back().emplace_back(I1->getOperand(Op), + I2->getOperand(Op)); + return all_of( + Candidates, [this](ArrayRef> Cand) { + return all_of(Cand, + [](const std::pair &P) { + return isa(P.first) || + isa(P.second) || P.first == P.second; + }) || + findBestRootPair(Cand, LookAheadHeuristics::ScoreSplatLoads); + }); + }; // The tree may grow here, so iterate over nodes, built before. - for (unsigned Idx : seq(VectorizableTree.size())) { + for (unsigned Idx : seq(BaseGraphSize)) { TreeEntry &E = *VectorizableTree[Idx]; if (E.isGather()) { ArrayRef VL = E.Scalars; const unsigned Sz = getVectorElementSize(VL.front()); unsigned MinVF = getMinVF(2 * Sz); + // Do not try partial vectorization for small nodes (<= 2), nodes with the + // same opcode and same parent block or all constants. if (VL.size() <= 2 || - (E.getOpcode() && - (E.isAltShuffle() || E.getOpcode() != Instruction::Load))) + !(!E.getOpcode() || E.getOpcode() == Instruction::Load || + E.isAltShuffle() || !allSameBlock(VL)) || + allConstant(VL) || isSplat(VL)) continue; // Try to find vectorizable sequences and transform them into a series of // insertvector instructions. unsigned StartIdx = 0; unsigned End = VL.size(); - for (unsigned VF = VL.size() / 2; VF >= MinVF; VF /= 2) { + for (unsigned VF = VL.size() / 2; VF >= MinVF; VF = bit_ceil(VF) / 2) { + SmallVector Slices; for (unsigned Cnt = StartIdx; Cnt + VF <= End; Cnt += VF) { ArrayRef Slice = VL.slice(Cnt, VF); // If any instruction is vectorized already - do not try again. - if (getTreeEntry(Slice.front()) || getTreeEntry(Slice.back())) + // Reuse the existing node, if it fully matches the slice. + if (const TreeEntry *SE = getTreeEntry(Slice.front()); + SE || getTreeEntry(Slice.back())) { + if (!SE) + continue; + if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) + continue; + } + // Constant already handled effectively - skip. + if (allConstant(Slice)) continue; - InstructionsState S = getSameOpcode(Slice, *TLI); - if (!S.getOpcode() || S.isAltShuffle() || - (S.getOpcode() != Instruction::Load && - any_of(Slice, [&](Value *V) { - return !areAllUsersVectorized(cast(V), - UserIgnoreList); - }))) + // Do not try to vectorize small splats (less than vector register and + // only with the single non-undef element). + bool IsSplat = isSplat(Slice); + if (Slices.empty() || !IsSplat || + (VF <= 2 && 2 * std::clamp(TTI->getNumberOfParts(getWidenedType( + Slice.front()->getType(), VF)), + 1U, VF - 1) != + std::clamp(TTI->getNumberOfParts(getWidenedType( + Slice.front()->getType(), 2 * VF)), + 1U, 2 * VF)) || + count(Slice, Slice.front()) == + (isa(Slice.front()) ? VF - 1 : 1)) { + if (IsSplat) + continue; + InstructionsState S = getSameOpcode(Slice, *TLI); + if (!S.getOpcode() || S.isAltShuffle() || !allSameBlock(Slice)) + continue; + if (VF == 2) { + // Try to vectorize reduced values or if all users are vectorized. + // For expensive instructions extra extracts might be profitable. + if ((!UserIgnoreList || E.Idx != 0) && + TTI->getInstructionCost(cast(Slice.front()), + CostKind) < TTI::TCC_Expensive && + !all_of(Slice, [&](Value *V) { + return areAllUsersVectorized(cast(V), + UserIgnoreList); + })) + continue; + if (S.getOpcode() == Instruction::Load) { + OrdersType Order; + SmallVector PointerOps; + LoadsState Res = + canVectorizeLoads(Slice, Slice.front(), Order, PointerOps); + // Do not vectorize gathers. + if (Res == LoadsState::ScatterVectorize || + Res == LoadsState::Gather) + continue; + } else if (S.getOpcode() == Instruction::ExtractElement || + (TTI->getInstructionCost( + cast(Slice.front()), CostKind) < + TTI::TCC_Expensive && + !CheckOperandsProfitability( + cast(Slice.front()), + cast(Slice.back()), S))) { + // Do not vectorize extractelements (handled effectively + // alread). Do not vectorize non-profitable instructions (with + // low cost and non-vectorizable operands.) + continue; + } + } + } + Slices.emplace_back(Cnt); + } + auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt) { + E.CombinedEntriesWithIndices.emplace_back(Idx, Cnt); + if (StartIdx == Cnt) + StartIdx = Cnt + VF; + if (End == Cnt + VF) + End = Cnt; + }; + for (unsigned Cnt : Slices) { + ArrayRef Slice = VL.slice(Cnt, VF); + // If any instruction is vectorized already - do not try again. + if (const TreeEntry *SE = getTreeEntry(Slice.front()); + SE || getTreeEntry(Slice.back())) { + if (!SE) + continue; + if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) + continue; + AddCombinedNode(SE->Idx, Cnt); continue; + } unsigned PrevSize = VectorizableTree.size(); buildTree_rec(Slice, 0, EdgeInfo(&E, UINT_MAX)); if (PrevSize + 1 == VectorizableTree.size() && - VectorizableTree[PrevSize]->isGather()) { + VectorizableTree[PrevSize]->isGather() && + VectorizableTree[PrevSize]->getOpcode() != + Instruction::ExtractElement && + !isSplat(Slice)) { VectorizableTree.pop_back(); continue; } - E.CombinedEntriesWithIndices.emplace_back(PrevSize, Cnt); - if (StartIdx == Cnt) - StartIdx = Cnt + VF; - if (End == Cnt + VF) - End = Cnt; + AddCombinedNode(PrevSize, Cnt); } } } @@ -12293,6 +12396,14 @@ BoUpSLP::isGatherShuffledEntry( "Expected only single user of the gather node."); assert(VL.size() % NumParts == 0 && "Number of scalars must be divisible by NumParts."); + if (!TE->UserTreeIndices.empty() && + TE->UserTreeIndices.front().UserTE->isGather() && + TE->UserTreeIndices.front().EdgeIdx == UINT_MAX) { + assert((TE->Idx == 0 || TE->getOpcode() == Instruction::ExtractElement || + isSplat(TE->Scalars)) && + "Expected splat or extractelements only node."); + return {}; + } unsigned SliceSize = getPartNumElems(VL.size(), NumParts); SmallVector> Res; for (unsigned Part : seq(NumParts)) { @@ -17119,7 +17230,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, if (R.isGathered(Chain.front()) || R.isNotScheduled(cast(Chain.front())->getValueOperand())) return std::nullopt; - Size = R.getTreeSize(); + Size = R.getCanonicalGraphSize(); return false; } R.reorderTopToBottom(); @@ -17129,7 +17240,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, R.computeMinimumValueSizes(); - Size = R.getTreeSize(); + Size = R.getCanonicalGraphSize(); if (S.getOpcode() == Instruction::Load) Size = 2; // cut off masked gather small trees InstructionCost Cost = R.getTreeCost(); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5b878108af59a..5f0b16048d40c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -685,10 +685,10 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1 ; CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM11]] -; CHECK-NEXT: [[ADD18:%.*]] = add nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64 -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM19]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[ADD14:%.*]] = or disjoint i32 [[MUL]], 1 +; CHECK-NEXT: [[IDXPROM15:%.*]] = sext i32 [[ADD14]] to i64 +; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM15]] ; CHECK-NEXT: [[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3 ; CHECK-NEXT: [[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64 ; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM23]] @@ -700,8 +700,8 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4 ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]] -; CHECK-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM19]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX56]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM15]] ; CHECK-NEXT: [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM23]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX60]], align 4 ; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]] @@ -715,12 +715,12 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <2 x i32> [[TMP8]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[TMP9]], [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <4 x i32> -; CHECK-NEXT: [[ARRAYIDX84:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 28 -; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT: [[MUL81:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX82:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 32 +; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX16]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX52]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[MUL85:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] ; CHECK-NEXT: [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]] ; CHECK-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 44 ; CHECK-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 36 @@ -728,8 +728,8 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur ; CHECK-NEXT: [[TMP18:%.*]] = load <2 x i32>, ptr [[ARRAYIDX64]], align 4 ; CHECK-NEXT: store i32 [[MUL73]], ptr [[Z]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[ARRAYIDX72]], align 4 -; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[ARRAYIDX84]], align 4 -; CHECK-NEXT: store i32 [[MUL85]], ptr [[ARRAYIDX76]], align 4 +; CHECK-NEXT: store i32 [[MUL81]], ptr [[ARRAYIDX82]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP16]], ptr [[ARRAYIDX76]], align 4 ; CHECK-NEXT: store i32 [[MUL87]], ptr [[ARRAYIDX88]], align 4 ; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <2 x i32> [[TMP18]], [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP19]], <2 x i32> poison, <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 9c086abe216c0..0fe4e6a5aa28b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -259,10 +259,12 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) { ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12) ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP11]] -; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[PTR]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP8]], <8 x i8> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP10]], <4 x i8> [[TMP3]], i64 12) +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP11]], <16 x i8> [[TMP13]] +; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr [[PTR]], align 2 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll index 9c22295a1c718..43c42c1ea2bfb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll @@ -12,12 +12,12 @@ define void @test() { ; CHECK-NEXT: ret void ; CHECK: [[BB6]]: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ] -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> , <2 x i32> [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> , <2 x i32> ; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]] ; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 813c5e7418b30..47b42bc8f32a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -10,10 +10,10 @@ define void @foo() personality ptr @bar { ; CHECK: bb2.loopexit: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP8:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP7:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ] ; CHECK-NEXT: ret void ; CHECK: bb3: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP4:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]] ; CHECK: bb4: @@ -21,30 +21,29 @@ define void @foo() personality ptr @bar { ; CHECK: bb5: ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ , [[BB8:%.*]] ] -; CHECK-NEXT: [[TMP4]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ , [[BB8:%.*]] ] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb7: ; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ] ; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]] ; CHECK: bb8: ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP8]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP7]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] +; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: -; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] +; CHECK-NEXT: [[TMP8]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void ; CHECK: bb12: -; CHECK-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP5]], [[BB7]] ] +; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label [[BB9]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index 7201583f3450e..ec8bcc85e7db0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -144,35 +144,36 @@ define float @foo3(ptr nocapture readonly %A) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP2]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]] ; CHECK-NEXT: [[TMP6:%.*]] = add nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX14]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 -; CHECK-NEXT: [[TMP9]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP11]], <2 x float> [[TMP8]], i64 2) -; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[TMP12]], -; CHECK-NEXT: [[TMP14]] = fadd <4 x float> [[TMP3]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP15]], 121 +; CHECK-NEXT: [[TMP7]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX14]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x float> [[TMP13]], +; CHECK-NEXT: [[TMP15]] = fadd <4 x float> [[TMP3]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[TMP14]], i32 0 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP16]] -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP14]], i32 1 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP14]], i32 2 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP14]], i32 3 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP19]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP15]], i32 0 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP15]], i32 1 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP15]], i32 2 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP15]], i32 3 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP20]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 12389f4a3dbf4..6200e3ae43fc9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -315,11 +315,12 @@ define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]]) -; CHECK-NEXT: ret i1 [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> , <4 x i32> [[TMP2]], i64 4) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) +; CHECK-NEXT: ret i1 [[TMP6]] ; %x0 = extractelement <8 x i32> %x, i32 0 %x1 = extractelement <8 x i32> %x, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 4ed52247c2ef3..b79ba458ef706 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -12,30 +12,25 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] ; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1 -; CHECK-NEXT: [[SHR_1_I_I:%.*]] = lshr i32 [[CONV31_I]], 2 -; CHECK-NEXT: [[SHR_2_I_I:%.*]] = lshr i32 [[CONV31_I]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[SUB_I]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR_I_I]] to i8 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[SHR_1_I_I]] to i8 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[SHR_2_I_I]] to i8 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = lshr <4 x i32> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = lshr <8 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = trunc <8 x i32> [[TMP16]] to <8 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP17]], <8 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP18]], <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i8> [[TMP19]], -; CHECK-NEXT: store <16 x i8> [[TMP20]], ptr undef, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[SUB_I]] to i8 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> poison, i8 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_I_I]] to i8 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP10]], <8 x i8> [[TMP11]], i64 8) +; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP12]], <4 x i8> [[TMP13]], i64 4) +; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v2i8(<16 x i8> [[TMP14]], <2 x i8> [[TMP15]], i64 2) +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], +; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: unreachable ; CHECK: if.end50.i: ; CHECK-NEXT: ret void From d2885743630fcb35fdf64d21bd4bec62a5cb4d37 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 Sep 2024 07:25:57 -0700 Subject: [PATCH 173/212] [TTI][RISCV] Model cost of loading constants arms of selects and compares (#109824) This follows in the spirit of 7d82c99403f615f6236334e698720bf979959704, and extends the costing API for compares and selects to provide information about the operands passed in an analogous manner. This allows us to model the cost of materializing the vector constant, as some select-of-constants are significantly more expensive than others when you account for the cost of materializing the constants involved. This is a stepping stone towards fixing https://github.com/llvm/llvm-project/issues/109466. A separate SLP patch will be required to utilize the new API. --- .../llvm/Analysis/TargetTransformInfo.h | 21 +++-- .../llvm/Analysis/TargetTransformInfoImpl.h | 10 ++- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 17 ++-- llvm/lib/Analysis/TargetTransformInfo.cpp | 7 +- .../AArch64/AArch64TargetTransformInfo.cpp | 14 ++-- .../AArch64/AArch64TargetTransformInfo.h | 10 ++- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 15 ++-- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 10 ++- llvm/lib/Target/BPF/BPFTargetTransformInfo.h | 12 +-- .../Hexagon/HexagonTargetTransformInfo.cpp | 12 +-- .../Hexagon/HexagonTargetTransformInfo.h | 10 ++- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 13 ++- .../Target/PowerPC/PPCTargetTransformInfo.h | 10 ++- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 79 ++++++++++++------- .../Target/RISCV/RISCVTargetTransformInfo.h | 10 ++- .../SystemZ/SystemZTargetTransformInfo.cpp | 15 ++-- .../SystemZ/SystemZTargetTransformInfo.h | 10 ++- .../lib/Target/X86/X86TargetTransformInfo.cpp | 20 ++--- llvm/lib/Target/X86/X86TargetTransformInfo.h | 10 ++- .../Transforms/Vectorize/LoopVectorize.cpp | 6 +- .../Transforms/Vectorize/SLPVectorizer.cpp | 23 ++++-- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 +- .../Analysis/CostModel/RISCV/rvv-select.ll | 8 +- 23 files changed, 208 insertions(+), 138 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index cd69a8a371b6e..89a85bc8a9086 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1371,11 +1371,15 @@ class TargetTransformInfo { /// is an existing instruction that holds Opcode, it may be passed in the /// 'I' parameter. The \p VecPred parameter can be used to indicate the select /// is using a compare with the specified predicate as condition. When vector - /// types are passed, \p VecPred must be used for all lanes. + /// types are passed, \p VecPred must be used for all lanes. For a + /// comparison, the two operands are the natural values. For a select, the + /// two operands are the *value* operands, not the condition operand. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + OperandValueInfo Op1Info = {OK_AnyValue, OP_None}, + OperandValueInfo Op2Info = {OK_AnyValue, OP_None}, const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. @@ -2049,11 +2053,11 @@ class TargetTransformInfo::Concept { virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0; - virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; + virtual InstructionCost + getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + OperandValueInfo Op1Info, OperandValueInfo Op2Info, + const Instruction *I) = 0; virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, @@ -2710,8 +2714,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + OperandValueInfo Op1Info, + OperandValueInfo Op2Info, const Instruction *I) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 79c8bafbc6c0d..eca8818cc25e6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -666,6 +666,8 @@ class TargetTransformInfoImplBase { InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) const { return 1; } @@ -1332,19 +1334,23 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, CostKind, Op1Info, Op2Info, Operands, I); } + const auto Op1Info = TTI::getOperandInfo(Operands[1]); + const auto Op2Info = TTI::getOperandInfo(Operands[2]); Type *CondTy = Operands[0]->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, CmpInst::BAD_ICMP_PREDICATE, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } case Instruction::ICmp: case Instruction::FCmp: { + const auto Op1Info = TTI::getOperandInfo(Operands[0]); + const auto Op2Info = TTI::getOperandInfo(Operands[1]); Type *ValTy = Operands[0]->getType(); // TODO: Also handle ICmp/FCmp constant expressions. return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), I ? cast(I)->getPredicate() : CmpInst::BAD_ICMP_PREDICATE, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } case Instruction::InsertElement: { auto *IE = dyn_cast(U); diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7198e134a2d26..ed074ecaebcf5 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1222,10 +1222,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return BaseT::getCFInstrCost(Opcode, CostKind, I); } - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) { + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1233,7 +1235,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Selects on vectors are actually vector selects. if (ISD == ISD::SELECT) { @@ -1260,8 +1262,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned Num = cast(ValVTy)->getNumElements(); if (CondTy) CondTy = CondTy->getScalarType(); - InstructionCost Cost = thisT()->getCmpSelInstrCost( - Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I); + InstructionCost Cost = + thisT()->getCmpSelInstrCost(Opcode, ValVTy->getScalarType(), CondTy, + VecPred, CostKind, Op1Info, Op2Info, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 67b626f300a10..b5195f764cbd1 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1015,11 +1015,12 @@ InstructionCost TargetTransformInfo::getCFInstrCost( InstructionCost TargetTransformInfo::getCmpSelInstrCost( unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, const Instruction *I) const { + TTI::TargetCostKind CostKind, OperandValueInfo Op1Info, + OperandValueInfo Op2Info, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - InstructionCost Cost = - TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + InstructionCost Cost = TTIImpl->getCmpSelInstrCost( + Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ac05a44abc2dd..7a07bb67e77de 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3440,15 +3440,14 @@ InstructionCost AArch64TTIImpl::getAddressComputationCost(Type *Ty, return 1; } -InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost AArch64TTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -3527,7 +3526,8 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // The base case handles scalable vectors fine for now, since it treats the // cost as 1 * legalization cost. - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } AArch64TTIImpl::TTI::MemCmpExpansionOptions diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 22bba21eedcc5..28e45207596ec 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -208,10 +208,12 @@ class AArch64TTIImpl : public BasicTTIImplBase { InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 9b5349241c341..865e2f3066ef0 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -934,11 +934,10 @@ InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getVectorInstrCost(Opcode, ValTy, CostKind, Index, Op0, Op1); } -InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost ARMTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // Thumb scalar code size cost for select. @@ -1052,7 +1051,7 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, VecValTy->getNumElements() * getCmpSelInstrCost(Opcode, ValTy->getScalarType(), VecCondTy->getScalarType(), VecPred, - CostKind, I); + CostKind, Op1Info, Op2Info, I); } std::pair LT = getTypeLegalizationCost(ValTy); @@ -1077,8 +1076,8 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (ST->hasMVEIntegerOps() && ValTy->isVectorTy()) BaseCost = ST->getMVEVectorCostFactor(CostKind); - return BaseCost * - BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, + CostKind, Op1Info, Op2Info, I); } InstructionCost ARMTTIImpl::getAddressComputationCost(Type *Ty, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 528f082dde32c..7be53c4bcaa29 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -239,10 +239,12 @@ class ARMTTIImpl : public BasicTTIImplBase { TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index 9d0db33d9a1fd..bf0bef3a2b2f9 100644 --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -44,15 +44,17 @@ class BPFTTIImpl : public BasicTTIImplBase { return TTI::TCC_Basic; } - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const llvm::Instruction *I = nullptr) { + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const llvm::Instruction *I = nullptr) { if (Opcode == Instruction::Select) return SCEVCheapExpansionBudget.getValue(); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); } InstructionCost getArithmeticInstrCost( diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index f47fcff5d6025..bbb9d065b6243 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -255,11 +255,10 @@ InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost( CostKind); } -InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost HexagonTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) { if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy()) return InstructionCost::getMax(); @@ -267,7 +266,8 @@ InstructionCost HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (Opcode == Instruction::FCmp) return LT.first + FloatFactor * getTypeNumElements(ValTy); } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost HexagonTTIImpl::getArithmeticInstrCost( diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 4a1cfe03d48a7..826644d08d1ac 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -132,10 +132,12 @@ class HexagonTTIImpl : public BasicTTIImplBase { unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b7bdbeb535d52..ec3d3dbc8f6aa 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -655,18 +655,17 @@ InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return Cost; } -InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost PPCTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, ValTy, nullptr); if (!CostFactor.isValid()) return InstructionCost::getMax(); - InstructionCost Cost = - BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + InstructionCost Cost = BaseT::getCmpSelInstrCost( + Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return Cost; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 126ccb2b3096e..3cb60d7a1785a 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -118,10 +118,12 @@ class PPCTTIImpl : public BasicTTIImplBase { const Instruction *I = nullptr); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 3bef01da0a445..e041854ee8fd6 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1618,23 +1618,38 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } -InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost RISCVTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); if (isa(ValTy) && !ST->useRVVForFixedLengthVectors()) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Skip if scalar size of ValTy is bigger than ELEN. if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen()) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); + + auto GetConstantMatCost = + [&](TTI::OperandValueInfo OpInfo) -> InstructionCost { + if (OpInfo.isUniform()) + // We return 0 we currently ignore the cost of materializing scalar + // constants in GPRs. + return 0; + + return getConstantPoolLoadCost(ValTy, CostKind); + }; + + InstructionCost ConstantMatCost; + if (Op1Info.isConstant()) + ConstantMatCost += GetConstantMatCost(Op1Info); + if (Op2Info.isConstant()) + ConstantMatCost += GetConstantMatCost(Op2Info); std::pair LT = getTypeLegalizationCost(ValTy); if (Opcode == Instruction::Select && ValTy->isVectorTy()) { @@ -1643,14 +1658,16 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmandn.mm v8, v8, v9 // vmand.mm v9, v0, v9 // vmor.mm v0, v9, v8 - return LT.first * - getRISCVInstructionCost( - {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM}, - LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost( + {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM}, + LT.second, CostKind); } // vselect and max/min are supported natively. - return LT.first * - getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind); + return ConstantMatCost + + LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, + CostKind); } if (ValTy->getScalarSizeInBits() == 1) { @@ -1660,7 +1677,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmand.mm v9, v0, v9 // vmor.mm v0, v9, v8 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8); - return LT.first * + return ConstantMatCost + + LT.first * getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI}, InterimVT, CostKind) + LT.first * getRISCVInstructionCost( @@ -1671,7 +1689,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmv.v.x v10, a0 // vmsne.vi v0, v10, 0 // vmerge.vvm v8, v9, v8, v0 - return LT.first * getRISCVInstructionCost( + return ConstantMatCost + + LT.first * getRISCVInstructionCost( {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM}, LT.second, CostKind); } @@ -1680,8 +1699,9 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, CmpInst::isIntPredicate(VecPred)) { // Use VMSLT_VV to represent VMSEQ, VMSNE, VMSLTU, VMSLEU, VMSLT, VMSLE // provided they incur the same cost across all implementations - return LT.first * - getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second, CostKind); + return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV, + LT.second, + CostKind); } if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() && @@ -1689,7 +1709,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // Use VMXOR_MM and VMXNOR_MM to generate all true/false mask if ((VecPred == CmpInst::FCMP_FALSE) || (VecPred == CmpInst::FCMP_TRUE)) - return getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind); + return ConstantMatCost + + getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind); // If we do not support the input floating point vector type, use the base // one which will calculate as: @@ -1699,7 +1720,7 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) || (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64())) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Assuming vector fp compare and mask instructions are all the same cost // until a need arises to differentiate them. @@ -1708,7 +1729,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_ORD: // vmfeq.vv + vmfeq.vv + vmand.mm case CmpInst::FCMP_UNO: // vmfne.vv + vmfne.vv + vmor.mm case CmpInst::FCMP_UEQ: // vmflt.vv + vmflt.vv + vmnor.mm - return LT.first * getRISCVInstructionCost( + return ConstantMatCost + + LT.first * getRISCVInstructionCost( {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM}, LT.second, CostKind); @@ -1716,9 +1738,10 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_UGE: // vmflt.vv + vmnot.m case CmpInst::FCMP_ULT: // vmfle.vv + vmnot.m case CmpInst::FCMP_ULE: // vmflt.vv + vmnot.m - return LT.first * - getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM}, - LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM}, + LT.second, CostKind); case CmpInst::FCMP_OEQ: // vmfeq.vv case CmpInst::FCMP_OGT: // vmflt.vv @@ -1726,8 +1749,9 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case CmpInst::FCMP_OLT: // vmflt.vv case CmpInst::FCMP_OLE: // vmfle.vv case CmpInst::FCMP_UNE: // vmfne.vv - return LT.first * - getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind); + return ConstantMatCost + + LT.first * + getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind); default: break; } @@ -1750,7 +1774,8 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // TODO: Add cost for scalar type. - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } InstructionCost RISCVTTIImpl::getCFInstrCost(unsigned Opcode, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index f16c4fc0eed02..65bbd90550855 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -200,10 +200,12 @@ class RISCVTTIImpl : public BasicTTIImplBase { TTI::OperandValueInfo OpdInfo = {TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 3cd1e05aa5d18..e44777c5c4857 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -959,13 +959,13 @@ static unsigned getOperandsExtensionCost(const Instruction *I) { return ExtCost; } -InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost SystemZTTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info); if (!ValTy->isVectorTy()) { switch (Opcode) { @@ -1041,7 +1041,8 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info); } InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 9294fada1eb77..e221200cfa08c 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -104,10 +104,12 @@ class SystemZTTIImpl : public BasicTTIImplBase { TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0fa138cefc3b8..46bc73c5e928e 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3157,15 +3157,14 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); } -InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost X86TTIImpl::getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, const Instruction *I) { // Early out if this type isn't scalar/vector integer/float. if (!(ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, - I); + Op1Info, Op2Info, I); // Legalize the type. std::pair LT = getTypeLegalizationCost(ValTy); @@ -3229,9 +3228,11 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // Use FCMP_UEQ expansion - FCMP_ONE should be the same. if (CondTy && !ST->hasAVX()) return getCmpSelInstrCost(Opcode, ValTy, CondTy, - CmpInst::Predicate::FCMP_UNO, CostKind) + + CmpInst::Predicate::FCMP_UNO, CostKind, + Op1Info, Op2Info) + getCmpSelInstrCost(Opcode, ValTy, CondTy, - CmpInst::Predicate::FCMP_OEQ, CostKind) + + CmpInst::Predicate::FCMP_OEQ, CostKind, + Op1Info, Op2Info) + getArithmeticInstrCost(Instruction::Or, CondTy, CostKind); break; @@ -3451,7 +3452,8 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, if (ValTy->getScalarType()->isFloatingPointTy()) return 3; - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + Op1Info, Op2Info, I); } unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 8ea67dcbe5166..c16461b157e07 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -156,10 +156,12 @@ class X86TTIImpl : public BasicTTIImplBase { TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6298c54c99459..cac0b57fc6964 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6587,7 +6587,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, if (auto *Cmp = dyn_cast(SI->getCondition())) Pred = Cmp->getPredicate(); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, Pred, - CostKind, I); + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, I); } case Instruction::ICmp: case Instruction::FCmp: { @@ -6606,7 +6607,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, cast(I)->getPredicate(), CostKind, - I); + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, I); } case Instruction::Store: case Instruction::Load: { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3695a8082531c..1dc749f2f13c7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10358,9 +10358,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, if (VI && SelectOnly) { assert(!Ty->isVectorTy() && "Expected only for scalar type."); auto *CI = cast(VI->getOperand(0)); - IntrinsicCost -= - TTI->getCmpSelInstrCost(CI->getOpcode(), Ty, Builder.getInt1Ty(), - CI->getPredicate(), CostKind, CI); + IntrinsicCost -= TTI->getCmpSelInstrCost( + CI->getOpcode(), Ty, Builder.getInt1Ty(), CI->getPredicate(), + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, CI); } return IntrinsicCost; }; @@ -10624,7 +10625,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost ScalarCost = TTI->getCmpSelInstrCost( E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred, - CostKind, VI); + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, VI); InstructionCost IntrinsicCost = GetMinMaxCost(OrigScalarTy, VI); if (IntrinsicCost.isValid()) ScalarCost = IntrinsicCost; @@ -10634,8 +10636,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, auto GetVectorCost = [&](InstructionCost CommonCost) { auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size()); - InstructionCost VecCost = TTI->getCmpSelInstrCost( - E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0); + InstructionCost VecCost = + TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VecPred, + CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, VL0); if (auto *SI = dyn_cast(VL0)) { auto *CondType = getWidenedType(SI->getCondition()->getType(), VL.size()); @@ -10875,11 +10879,14 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast(VL0)) { auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size()); - VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CI0->getPredicate(), CostKind, VL0); + VecCost = TTIRef.getCmpSelInstrCost( + E->getOpcode(), VecTy, MaskTy, CI0->getPredicate(), CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, + VL0); VecCost += TTIRef.getCmpSelInstrCost( E->getOpcode(), VecTy, MaskTy, cast(E->getAltOp())->getPredicate(), CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, E->getAltOp()); } else { Type *SrcSclTy = E->getMainOp()->getOperand(0)->getType(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 3f5b73d2d43c3..5d1a13086e9f9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1259,7 +1259,9 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, Instruction *CtxI = dyn_cast_or_null(getUnderlyingValue()); Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(), - CostKind, CtxI); + CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, CtxI); } default: llvm_unreachable("Unsupported opcode for instruction"); diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll index 9eadcaca6bb55..2bf1e5d26e2da 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll @@ -394,10 +394,10 @@ define void @select() { define void @select_of_constants() { ; CHECK-LABEL: 'select_of_constants' ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = select i1 undef, <2 x i64> , <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = select <4 x i1> undef, <4 x i32> , <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = select i1 undef, <2 x i64> , <2 x i64> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = select i1 undef, <2 x i64> , <2 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = select <4 x i1> undef, <4 x i32> , <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; Splat constants From 26e0b5077236064d9ab0548e049dffce4d476c06 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 25 Sep 2024 14:21:48 +0000 Subject: [PATCH 174/212] [lldb][lldb-dap] Fix compilation error on 32 bit platforms https://github.com/llvm/llvm-project/pull/109485 tried to std::min between size_t and uint64_t. size_t on 32 bit is 32 bits. https://lab.llvm.org/buildbot/#/builders/18/builds/4430/steps/4/logs/stdio Explicitly select the size_t template to fix this. This will truncate one of the arguments but that's the count_requested. If you're debugging from a 32 bit host and you asked it to read > 32 bit range of memory from a 64 bit target, you weren't going to have any success anyway. The final result needs to be size_t to resize the vector with. --- lldb/tools/lldb-dap/lldb-dap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index f692d77347038..db4dbbd6f6200 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -4451,7 +4451,7 @@ void request_readMemory(const llvm::json::Object &request) { g_dap.SendJSON(llvm::json::Value(std::move(response))); return; } - buf.resize(std::min(count_result, count_requested)); + buf.resize(std::min(count_result, count_requested)); llvm::json::Object body; std::string formatted_addr = "0x" + llvm::utohexstr(addr_int); From ac802a3148cc6df21f6ea3f9942a90499388fa25 Mon Sep 17 00:00:00 2001 From: Shourya Goel Date: Wed, 25 Sep 2024 20:05:20 +0530 Subject: [PATCH 175/212] [libc][math] Implement issignaling macro. (#109615) #109201 --- .../llvm-libc-macros/math-function-macros.h | 4 ++ libc/test/include/CMakeLists.txt | 45 +++++++++++++++++ libc/test/include/IsSignalingTest.h | 49 +++++++++++++++++++ libc/test/include/issignaling_test.c | 24 +++++++++ libc/test/include/issignaling_test.cpp | 18 +++++++ libc/test/include/issignalingf_test.cpp | 18 +++++++ libc/test/include/issignalingl_test.cpp | 18 +++++++ 7 files changed, 176 insertions(+) create mode 100644 libc/test/include/IsSignalingTest.h create mode 100644 libc/test/include/issignaling_test.c create mode 100644 libc/test/include/issignaling_test.cpp create mode 100644 libc/test/include/issignalingf_test.cpp create mode 100644 libc/test/include/issignalingl_test.cpp diff --git a/libc/include/llvm-libc-macros/math-function-macros.h b/libc/include/llvm-libc-macros/math-function-macros.h index 68f9ff9d1c033..c740eb2d18825 100644 --- a/libc/include/llvm-libc-macros/math-function-macros.h +++ b/libc/include/llvm-libc-macros/math-function-macros.h @@ -20,5 +20,9 @@ __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x) #define isnormal(x) __builtin_isnormal(x) #define issubnormal(x) (fpclassify(x) == FP_SUBNORMAL) +#if (defined(__clang__) && __clang_major__ >= 18) || \ + (defined(__GNUC__) && __GNUC__ >= 13) +#define issignaling(x) __builtin_issignaling(x) +#endif #endif // LLVM_LIBC_MACROS_MATH_FUNCTION_MACROS_H diff --git a/libc/test/include/CMakeLists.txt b/libc/test/include/CMakeLists.txt index 12692eed417c4..dd8f21bdd07ae 100644 --- a/libc/test/include/CMakeLists.txt +++ b/libc/test/include/CMakeLists.txt @@ -81,6 +81,36 @@ add_libc_test( libc.include.llvm-libc-macros.stdckdint_macros ) +add_libc_test( + issignaling_test + SUITE + libc_include_tests + SRCS + issignaling_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + +add_libc_test( + issignalingf_test + SUITE + libc_include_tests + SRCS + issignalingf_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + +add_libc_test( + issignalingl_test + SUITE + libc_include_tests + SRCS + issignalingl_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + add_libc_test( issubnormal_test SUITE @@ -366,6 +396,21 @@ add_libc_test( libc.include.llvm-libc-macros.math_function_macros ) +add_libc_test( + issignaling_c_test + C_TEST + UNIT_TEST_ONLY + SUITE + libc_include_tests + SRCS + issignaling_test.c + COMPILE_OPTIONS + -Wall + -Werror + DEPENDS + libc.include.llvm-libc-macros.math_function_macros +) + add_libc_test( isinf_c_test C_TEST diff --git a/libc/test/include/IsSignalingTest.h b/libc/test/include/IsSignalingTest.h new file mode 100644 index 0000000000000..c369cfe090ed3 --- /dev/null +++ b/libc/test/include/IsSignalingTest.h @@ -0,0 +1,49 @@ +//===-- Utility class to test the issignaling macro ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H +#define LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H + +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "include/llvm-libc-macros/math-function-macros.h" + +template +class IsSignalingTest : public LIBC_NAMESPACE::testing::Test { + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef int (*IsSignalingFunc)(T); + + void testSpecialNumbers(IsSignalingFunc func) { + EXPECT_EQ(func(aNaN), 0); + EXPECT_EQ(func(neg_aNaN), 0); + EXPECT_EQ(func(sNaN), 1); + EXPECT_EQ(func(neg_sNaN), 1); + EXPECT_EQ(func(inf), 0); + EXPECT_EQ(func(neg_inf), 0); + EXPECT_EQ(func(min_normal), 0); + EXPECT_EQ(func(max_normal), 0); + EXPECT_EQ(func(neg_max_normal), 0); + EXPECT_EQ(func(min_denormal), 0); + EXPECT_EQ(func(neg_min_denormal), 0); + EXPECT_EQ(func(max_denormal), 0); + EXPECT_EQ(func(zero), 0); + EXPECT_EQ(func(neg_zero), 0); + } +}; + +#define LIST_ISSIGNALING_TESTS(T, func) \ + using LlvmLibcIsSignalingTest = IsSignalingTest; \ + TEST_F(LlvmLibcIsSignalingTest, SpecialNumbers) { \ + auto issignaling_func = [](T x) { return func(x); }; \ + testSpecialNumbers(issignaling_func); \ + } + +#endif // LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H diff --git a/libc/test/include/issignaling_test.c b/libc/test/include/issignaling_test.c new file mode 100644 index 0000000000000..2c080696404ae --- /dev/null +++ b/libc/test/include/issignaling_test.c @@ -0,0 +1,24 @@ +//===-- Unittests for issignaling macro -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "include/llvm-libc-macros/math-function-macros.h" + +#include + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +int main(void) { +#ifdef issignaling + assert(issignaling(__builtin_nans("")) == 1); + assert(issignaling(__builtin_nansf("")) == 1); + assert(issignaling(__builtin_nansl("")) == 1); + assert(issignaling(1.819f) == 0); + assert(issignaling(-1.726) == 0); + assert(issignaling(1.426L) == 0); +#endif + return 0; +} diff --git a/libc/test/include/issignaling_test.cpp b/libc/test/include/issignaling_test.cpp new file mode 100644 index 0000000000000..ef007feb0a633 --- /dev/null +++ b/libc/test/include/issignaling_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[d] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(double, issignaling) +#else +int main() { return 0; } +#endif diff --git a/libc/test/include/issignalingf_test.cpp b/libc/test/include/issignalingf_test.cpp new file mode 100644 index 0000000000000..9b236f2bb84d7 --- /dev/null +++ b/libc/test/include/issignalingf_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[f] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(float, issignaling) +#else +int main() { return 0; } +#endif diff --git a/libc/test/include/issignalingl_test.cpp b/libc/test/include/issignalingl_test.cpp new file mode 100644 index 0000000000000..35482cb4b0202 --- /dev/null +++ b/libc/test/include/issignalingl_test.cpp @@ -0,0 +1,18 @@ +//===-- Unittest for issignaling[l] macro ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IsSignalingTest.h" +#include "include/llvm-libc-macros/math-function-macros.h" + +// TODO: enable the test unconditionally when issignaling macro is fixed for +// older compiler +#ifdef issignaling +LIST_ISSIGNALING_TESTS(long double, issignaling) +#else +int main() { return 0; } +#endif From a514457e62e96a13fc69343e058658f37bff9641 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Wed, 25 Sep 2024 10:43:02 -0400 Subject: [PATCH 176/212] Mark tests as unsupported when targeting z/OS (#107916) Set up these tests so these are marked as unsupported when targeting z/OS. Most would already be unsupported if you ran lit on z/OS. However, they also need to be unsupported if the default triple is z/OS. --- clang/test/Analysis/ctu-on-demand-parsing.c | 1 + clang/test/Analysis/ctu-on-demand-parsing.cpp | 1 + clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm | 5 +++-- clang/test/Driver/hipstdpar.c | 1 + clang/test/Driver/lld-repro.c | 2 +- clang/test/OpenMP/lit.local.cfg | 4 ++++ 6 files changed, 11 insertions(+), 3 deletions(-) diff --git a/clang/test/Analysis/ctu-on-demand-parsing.c b/clang/test/Analysis/ctu-on-demand-parsing.c index 72288def61b13..17ade150ded5e 100644 --- a/clang/test/Analysis/ctu-on-demand-parsing.c +++ b/clang/test/Analysis/ctu-on-demand-parsing.c @@ -24,6 +24,7 @@ // // FIXME: Path handling should work on all platforms. // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} void clang_analyzer_eval(int); diff --git a/clang/test/Analysis/ctu-on-demand-parsing.cpp b/clang/test/Analysis/ctu-on-demand-parsing.cpp index d28d3c22c69b0..0c0128faefaea 100644 --- a/clang/test/Analysis/ctu-on-demand-parsing.cpp +++ b/clang/test/Analysis/ctu-on-demand-parsing.cpp @@ -35,6 +35,7 @@ // // FIXME: Path handling should work on all platforms. // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} #include "ctu-hdr.h" diff --git a/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm b/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm index 9956348f87ff4..ad5a3e14a81db 100644 --- a/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm +++ b/clang/test/CodeGenCXX/pr59765-modules-global-ctor-dtor.cppm @@ -1,9 +1,10 @@ // https://github.com/llvm/llvm-project/issues/59765 // FIXME: Since the signature of the constructors/destructors is // different in different targets. The current CHECK can't work -// well when targeting or running on AIX and z/OS. +// well when targeting AIX and z/OS. // It would be better to add the corresponding test for other test. -// UNSUPPORTED: system-zos, system-aix +// UNSUPPORTED: system-aix +// UNSUPPORTED: target={{.*}}-zos{{.*}} // // RUN: rm -rf %t // RUN: mkdir %t diff --git a/clang/test/Driver/hipstdpar.c b/clang/test/Driver/hipstdpar.c index 2f48bf6b5cf1e..32e040ef70d75 100644 --- a/clang/test/Driver/hipstdpar.c +++ b/clang/test/Driver/hipstdpar.c @@ -1,6 +1,7 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target // REQUIRES: system-linux +// UNSUPPORTED: target={{.*}}-zos{{.*}} // XFAIL: target={{.*}}hexagon{{.*}} // XFAIL: target={{.*}}-scei{{.*}} // XFAIL: target={{.*}}-sie{{.*}} diff --git a/clang/test/Driver/lld-repro.c b/clang/test/Driver/lld-repro.c index 61904c0e6df30..0e6340865b738 100644 --- a/clang/test/Driver/lld-repro.c +++ b/clang/test/Driver/lld-repro.c @@ -1,5 +1,5 @@ // REQUIRES: lld -// UNSUPPORTED: target={{.*-(ps4|ps5)}} +// UNSUPPORTED: target={{.*-(ps4|ps5)}}, target={{.*}}-zos{{.*}} // RUN: echo "-nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t" \ // RUN: | sed -e 's/\\/\\\\/g' > %t.rsp diff --git a/clang/test/OpenMP/lit.local.cfg b/clang/test/OpenMP/lit.local.cfg index 58ee923cb7ec5..93adc6734d1a2 100644 --- a/clang/test/OpenMP/lit.local.cfg +++ b/clang/test/OpenMP/lit.local.cfg @@ -1,5 +1,9 @@ # -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +import re from lit.llvm.subst import ToolSubst fc = ToolSubst("FileCheck", unresolved="fatal") config.substitutions.insert(0, (fc.regex, "FileCheck --allow-unused-prefixes")) + +if re.match(r".*-zos", config.target_triple): + config.unsupported = True From 6b109a34ccedd3c75a067e322da0386c156c241d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 Sep 2024 10:43:27 -0400 Subject: [PATCH 177/212] [SLP]Initial support for non-power-of-2 (but still whole register) number of elements in operands. Patch adds basic support for non-power-of-2 number of elements in operands. The patch still requires that this number addresses whole registers. Reviewers: RKSimon, preames Reviewed By: preames Pull Request: https://github.com/llvm/llvm-project/pull/107273 --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 14 ++- .../Transforms/Vectorize/SLPVectorizer.cpp | 90 ++++++++++++++----- .../reduction-whole-regs-loads.ll | 28 +++--- 3 files changed, 98 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ed074ecaebcf5..cb62c86b502c1 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2538,7 +2538,19 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getNumberOfParts(Type *Tp) { std::pair LT = getTypeLegalizationCost(Tp); - return LT.first.isValid() ? *LT.first.getValue() : 0; + if (!LT.first.isValid()) + return 0; + // Try to find actual number of parts for non-power-of-2 elements as + // ceil(num-of-elements/num-of-subtype-elements). + if (auto *FTp = dyn_cast(Tp); + Tp && LT.second.isFixedLengthVector() && + !has_single_bit(FTp->getNumElements())) { + if (auto *SubTp = dyn_cast_if_present( + EVT(LT.second).getTypeForEVT(Tp->getContext())); + SubTp && SubTp->getElementType() == FTp->getElementType()) + return divideCeil(FTp->getNumElements(), SubTp->getNumElements()); + } + return *LT.first.getValue(); } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1dc749f2f13c7..154fed4a8ad2e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -260,6 +260,20 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) { VF * getNumElements(ScalarTy)); } +/// Returns the number of elements of the given type \p Ty, not less than \p Sz, +/// which forms type, which splits by \p TTI into whole vector types during +/// legalization. +static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI, + Type *Ty, unsigned Sz) { + if (!isValidElementType(Ty)) + return bit_ceil(Sz); + // Find the number of elements, which forms full vectors. + const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); + if (NumParts == 0 || NumParts >= Sz) + return bit_ceil(Sz); + return bit_ceil(divideCeil(Sz, NumParts)) * NumParts; +} + static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements, SmallVectorImpl &Mask) { // The ShuffleBuilder implementation use shufflevector to splat an "element". @@ -394,7 +408,7 @@ static bool isVectorLikeInstWithConstOps(Value *V) { /// total number of elements \p Size and number of registers (parts) \p /// NumParts. static unsigned getPartNumElems(unsigned Size, unsigned NumParts) { - return PowerOf2Ceil(divideCeil(Size, NumParts)); + return std::min(Size, bit_ceil(divideCeil(Size, NumParts))); } /// Returns correct remaining number of elements, considering total amount \p @@ -1222,6 +1236,22 @@ static bool doesNotNeedToSchedule(ArrayRef VL) { (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); } +/// Returns true if widened type of \p Ty elements with size \p Sz represents +/// full vector type, i.e. adding extra element results in extra parts upon type +/// legalization. +static bool hasFullVectorsOrPowerOf2(const TargetTransformInfo &TTI, Type *Ty, + unsigned Sz) { + if (Sz <= 1) + return false; + if (!isValidElementType(Ty) && !isa(Ty)) + return false; + if (has_single_bit(Sz)) + return true; + const unsigned NumParts = TTI.getNumberOfParts(getWidenedType(Ty, Sz)); + return NumParts > 0 && NumParts < Sz && has_single_bit(Sz / NumParts) && + Sz % NumParts == 0; +} + namespace slpvectorizer { /// Bottom Up SLP Vectorizer. @@ -3311,6 +3341,15 @@ class BoUpSLP { /// Return true if this is a non-power-of-2 node. bool isNonPowOf2Vec() const { bool IsNonPowerOf2 = !has_single_bit(Scalars.size()); + return IsNonPowerOf2; + } + + /// Return true if this is a node, which tries to vectorize number of + /// elements, forming whole vectors. + bool + hasNonWholeRegisterOrNonPowerOf2Vec(const TargetTransformInfo &TTI) const { + bool IsNonPowerOf2 = !hasFullVectorsOrPowerOf2( + TTI, getValueType(Scalars.front()), Scalars.size()); assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) && "Reshuffling not supported with non-power-of-2 vectors yet."); return IsNonPowerOf2; @@ -3430,8 +3469,10 @@ class BoUpSLP { Last->State = EntryState; // FIXME: Remove once support for ReuseShuffleIndices has been implemented // for non-power-of-two vectors. - assert((has_single_bit(VL.size()) || ReuseShuffleIndices.empty()) && - "Reshuffling scalars not yet supported for nodes with padding"); + assert( + (hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()), VL.size()) || + ReuseShuffleIndices.empty()) && + "Reshuffling scalars not yet supported for nodes with padding"); Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); if (ReorderIndices.empty()) { @@ -5269,7 +5310,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // node. if (!TE.ReuseShuffleIndices.empty()) { // FIXME: Support ReuseShuffleIndices for non-power-of-two vectors. - assert(!TE.isNonPowOf2Vec() && + assert(!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI) && "Reshuffling scalars not yet supported for nodes with padding"); if (isSplat(TE.Scalars)) @@ -5509,7 +5550,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { } // FIXME: Remove the non-power-of-two check once findReusedOrderedScalars // has been auditted for correctness with non-power-of-two vectors. - if (!TE.isNonPowOf2Vec()) + if (!TE.hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) if (std::optional CurrentOrder = findReusedOrderedScalars(TE)) return CurrentOrder; } @@ -5662,8 +5703,8 @@ void BoUpSLP::reorderTopToBottom() { }); // Reorder the graph nodes according to their vectorization factor. - for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1; - VF = bit_ceil(VF) / 2) { + for (unsigned VF = VectorizableTree.front()->getVectorFactor(); + !VFToOrderedEntries.empty() && VF > 1; VF -= 2 - (VF & 1U)) { auto It = VFToOrderedEntries.find(VF); if (It == VFToOrderedEntries.end()) continue; @@ -5671,6 +5712,9 @@ void BoUpSLP::reorderTopToBottom() { // used order and reorder scalar elements in the nodes according to this // mostly used order. ArrayRef OrderedEntries = It->second.getArrayRef(); + // Delete VF entry upon exit. + auto Cleanup = make_scope_exit([&]() { VFToOrderedEntries.erase(It); }); + // All operands are reordered and used only in this node - propagate the // most used order to the user node. MapVector VL, unsigned Depth, UniqueValues.emplace_back(V); } size_t NumUniqueScalarValues = UniqueValues.size(); - if (NumUniqueScalarValues == VL.size()) { + bool IsFullVectors = hasFullVectorsOrPowerOf2( + *TTI, UniqueValues.front()->getType(), NumUniqueScalarValues); + if (NumUniqueScalarValues == VL.size() && + (VectorizeNonPowerOf2 || IsFullVectors)) { ReuseShuffleIndices.clear(); } else { // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops. - if ((UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) || - !llvm::has_single_bit(VL.size())) { + if ((UserTreeIdx.UserTE && + UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) || + !has_single_bit(VL.size())) { LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " "for nodes with padding.\n"); newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); return false; } LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); - if (NumUniqueScalarValues <= 1 || - (UniquePositions.size() == 1 && all_of(UniqueValues, - [](Value *V) { - return isa(V) || - !isConstant(V); - })) || - !llvm::has_single_bit(NumUniqueScalarValues)) { + if (NumUniqueScalarValues <= 1 || !IsFullVectors || + (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { + return isa(V) || !isConstant(V); + }))) { if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() && all_of(UniqueValues, [=](Value *V) { @@ -7555,7 +7600,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, areAllUsersVectorized(cast(V), UserIgnoreList); })) { - unsigned PWSz = PowerOf2Ceil(UniqueValues.size()); + // Find the number of elements, which forms full vectors. + unsigned PWSz = getFullVectorNumberOfElements( + *TTI, UniqueValues.front()->getType(), UniqueValues.size()); if (PWSz == VL.size()) { ReuseShuffleIndices.clear(); } else { @@ -9793,9 +9840,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { return nullptr; Value *VecBase = nullptr; ArrayRef VL = E->Scalars; - // If the resulting type is scalarized, do not adjust the cost. - if (NumParts == VL.size()) - return nullptr; // Check if it can be considered reused if same extractelements were // vectorized already. bool PrevNodeFound = any_of( @@ -10450,7 +10494,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InsertMask[Idx] = I + 1; } unsigned VecScalarsSz = PowerOf2Ceil(NumElts); - if (NumOfParts > 0) + if (NumOfParts > 0 && NumOfParts < NumElts) VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts); unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) * VecScalarsSz; @@ -17785,7 +17829,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, for (unsigned I = NextInst; I < MaxInst; ++I) { unsigned ActualVF = std::min(MaxInst - I, VF); - if (!has_single_bit(ActualVF)) + if (!hasFullVectorsOrPowerOf2(*TTI, ScalarTy, ActualVF)) continue; if (MaxVFOnly && ActualVF < MaxVF) diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 281b5f99540ea..4074b8654362e 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -1,21 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux -mattr=+v -slp-threshold=-100 | FileCheck %s --check-prefix=RISCV ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-100 | FileCheck %s ; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=aarch64-unknown-linux -slp-threshold=-100 | FileCheck %s ; REQUIRES: aarch64-registered-target, x86-registered-target, riscv-registered-target define i64 @test(ptr %p) { +; RISCV-LABEL: @test( +; RISCV-NEXT: entry: +; RISCV-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 +; RISCV-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 +; RISCV-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 +; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> +; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) +; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) +; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], +; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) +; RISCV-NEXT: ret i64 [[TMP6]] +; ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) -; CHECK-NEXT: ret i64 [[TMP6]] +; CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; entry: %arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1 From 6fae8b8a4266461e81da12d2a7889ff35aac6526 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 07:44:30 -0700 Subject: [PATCH 178/212] [rtsan][NFC] Rename RTSAN_LINK_LIBS to RTSAN_DYNAMIC_LIBS (#109991) Follow on to #109715 This better matches this same variable in asan, ubsan, hwasan, and nsan. Shows the logical coupling, and describes them as "dynamic only" which is their intent. --- compiler-rt/lib/rtsan/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt index d4296f56acd30..b7e2362d31352 100644 --- a/compiler-rt/lib/rtsan/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/CMakeLists.txt @@ -25,7 +25,7 @@ set(RTSAN_CFLAGS ${COMPILER_RT_CXX_CFLAGS} -DSANITIZER_COMMON_NO_REDEFINE_BUILTINS) set(RTSAN_LINK_FLAGS ${COMPILER_RT_COMMON_LINK_FLAGS}) -set(RTSAN_LINK_LIBS +set(RTSAN_DYNAMIC_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS} ${SANITIZER_CXX_ABI_LIBRARIES} ${SANITIZER_COMMON_LINK_LIBS}) @@ -61,11 +61,11 @@ set(RTSAN_COMMON_RUNTIME_OBJECT_LIBS RTSanitizerCommonCoverage RTSanitizerCommonSymbolizer) -append_list_if(COMPILER_RT_HAS_LIBDL dl RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBRT rt RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBM m RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread RTSAN_LINK_LIBS) -append_list_if(COMPILER_RT_HAS_LIBLOG log RTSAN_LINK_LIBS) +append_list_if(COMPILER_RT_HAS_LIBDL dl RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBRT rt RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBM m RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread RTSAN_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBLOG log RTSAN_DYNAMIC_LIBS) add_compiler_rt_component(rtsan) @@ -80,7 +80,7 @@ if (APPLE) OBJECT_LIBS RTRtsan ${RTSAN_COMMON_RUNTIME_OBJECT_LIBS} LINK_FLAGS ${RTSAN_LINK_FLAGS} - LINK_LIBS ${RTSAN_LINK_LIBS} + LINK_LIBS ${RTSAN_DYNAMIC_LIBS} PARENT_TARGET rtsan) else() add_compiler_rt_runtime(clang_rt.rtsan From 3be8e3ad0c424dbeb9e4c8401174335e106a2d5d Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Wed, 25 Sep 2024 16:45:47 +0200 Subject: [PATCH 179/212] [flang] translate pure and elemental attribute in FIR (#109954) Follow-up from a previous patch that turned bind_c into an enum for procedure attribute. This patch carries the elemental and pure Fortran attribute into FIR so that the optimizer can leverage that info in the future (I think debug info may also need to know these aspects since DWARF has DW_AT_elemental and DW_AT_pure nodes). SIMPLE from F2023 will be translated once it is handled in the front-end. NON_RECURSIVE is only meaningful on func.func since we are not guaranteed to know that aspect on the caller side (it is not part of Fortran characteristics). There is a DW_AT_recursive DWARF node. I will do it while dealing with func.func attributes. --- flang/include/flang/Lower/CallInterface.h | 9 ++++++++ .../flang/Optimizer/Dialect/FIRAttr.td | 2 ++ flang/lib/Lower/CallInterface.cpp | 23 +++++++++++++++++++ flang/lib/Lower/ConvertCall.cpp | 8 ++----- .../HLFIR/array-ctor-as-elemental-nested.f90 | 2 +- .../Lower/HLFIR/array-ctor-as-elemental.f90 | 2 +- .../test/Lower/HLFIR/elemental-array-ops.f90 | 2 +- .../HLFIR/elemental-user-procedure-ref.f90 | 14 +++++------ flang/test/Lower/HLFIR/forall.f90 | 12 +++++----- flang/test/Lower/HLFIR/where-nonelemental.f90 | 6 ++--- .../test/Lower/array-elemental-calls-char.f90 | 2 +- .../test/Lower/array-user-def-assignments.f90 | 2 +- 12 files changed, 57 insertions(+), 27 deletions(-) diff --git a/flang/include/flang/Lower/CallInterface.h b/flang/include/flang/Lower/CallInterface.h index 9a688330e8bd2..1fb390455733f 100644 --- a/flang/include/flang/Lower/CallInterface.h +++ b/flang/include/flang/Lower/CallInterface.h @@ -42,6 +42,10 @@ namespace mlir { class Location; } +namespace fir { +class FortranProcedureFlagsEnumAttr; +} + namespace Fortran::lower { class AbstractConverter; class SymMap; @@ -235,6 +239,11 @@ class CallInterface { return characteristic && characteristic->CanBeCalledViaImplicitInterface(); } + /// Translate Fortran procedure attributes into FIR attribute. + /// Return attribute is nullptr if the procedure has no attributes. + fir::FortranProcedureFlagsEnumAttr + getProcedureAttrs(mlir::MLIRContext *) const; + protected: CallInterface(Fortran::lower::AbstractConverter &c) : converter{c} {} /// CRTP handle. diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index 6400756b38448..4e84959a3b3e1 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -62,6 +62,8 @@ def fir_FortranVariableFlagsAttr : fir_Attr<"FortranVariableFlags"> { /// Fortran procedure attributes (F2023 15.6.2.1). BIND attribute (18.3.7) /// is also tracked in the same enum. Recursive (resp. Impure) attribute /// is implied by the absence of opposite NonRecursive (resp. Pure) attribute. +/// Beware that "elemental" does not implicitly imply "pure" as it does in +/// Fortran, "pure" must be made explicit when generating the FIR attribute. def FIRfuncNoAttributes : I32BitEnumAttrCaseNone<"none">; def FIRfuncElemental : I32BitEnumAttrCaseBit<"elemental", 0>; def FIRfuncPure : I32BitEnumAttrCaseBit<"pure", 1>; diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index c0ef96adc20c3..f541f84738291 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -1546,6 +1546,29 @@ Fortran::lower::CallInterface::getResultType() const { return types; } +template +fir::FortranProcedureFlagsEnumAttr +Fortran::lower::CallInterface::getProcedureAttrs( + mlir::MLIRContext *mlirContext) const { + if (characteristic) { + fir::FortranProcedureFlagsEnum flags = fir::FortranProcedureFlagsEnum::none; + if (characteristic->IsBindC()) + flags = flags | fir::FortranProcedureFlagsEnum::bind_c; + if (characteristic->IsPure()) + flags = flags | fir::FortranProcedureFlagsEnum::pure; + if (characteristic->IsElemental()) + flags = flags | fir::FortranProcedureFlagsEnum::elemental; + // TODO: + // - SIMPLE: F2023, not yet handled by semantics. + // - NON_RECURSIVE: not part of the characteristics. Maybe this should + // simply not be part of FortranProcedureFlagsEnum since cannot accurately + // be known on the caller side. + if (flags != fir::FortranProcedureFlagsEnum::none) + return fir::FortranProcedureFlagsEnumAttr::get(mlirContext, flags); + } + return nullptr; +} + template class Fortran::lower::CallInterface; template class Fortran::lower::CallInterface; diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index 017bfd049d3dc..ee5eb225f0d7e 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -631,13 +631,9 @@ std::pair Fortran::lower::genCallOpAndResult( if (callNumResults != 0) callResult = dispatch.getResult(0); } else { - // TODO: gather other procedure attributes. - fir::FortranProcedureFlagsEnumAttr procAttrs; - if (caller.characterize().IsBindC()) - procAttrs = fir::FortranProcedureFlagsEnumAttr::get( - builder.getContext(), fir::FortranProcedureFlagsEnum::bind_c); - // Standard procedure call with fir.call. + fir::FortranProcedureFlagsEnumAttr procAttrs = + caller.getProcedureAttrs(builder.getContext()); auto call = builder.create( loc, funcType.getResults(), funcSymbolAttr, operands, procAttrs); diff --git a/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 b/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 index a30c6c6e4a227..1dc033d0ba033 100644 --- a/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 +++ b/flang/test/Lower/HLFIR/array-ctor-as-elemental-nested.f90 @@ -31,7 +31,7 @@ ! CHECK: %[[VAL_21:.*]]:3 = hlfir.associate %[[VAL_22:.*]](%[[VAL_17]]) {adapt.valuebyref} : (!hlfir.expr<2xf32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) ! CHECK: %[[VAL_23:.*]] = fir.embox %[[VAL_21]]#0(%[[VAL_17]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> ! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (!fir.box>) -> !fir.box> -! CHECK: %[[VAL_25:.*]] = fir.call @_QPfoo(%[[VAL_24]]) fastmath : (!fir.box>) -> f32 +! CHECK: %[[VAL_25:.*]] = fir.call @_QPfoo(%[[VAL_24]]) proc_attrs fastmath : (!fir.box>) -> f32 ! CHECK: hlfir.end_associate %[[VAL_21]]#1, %[[VAL_21]]#2 : !fir.ref>, i1 ! CHECK: hlfir.destroy %[[VAL_22]] : !hlfir.expr<2xf32> ! CHECK: hlfir.yield_element %[[VAL_25]] : f32 diff --git a/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 b/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 index 277e2683c64f8..4d3f93c7d48ce 100644 --- a/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 +++ b/flang/test/Lower/HLFIR/array-ctor-as-elemental.f90 @@ -107,7 +107,7 @@ integer pure function foo(i) ! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_5]], %[[VAL_12]] : index ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (index) -> i64 ! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i64) -> i32 -! CHECK: %[[VAL_16:.*]] = fir.call @_QPfoo(%[[VAL_15]]) fastmath : (i32) -> i32 +! CHECK: %[[VAL_16:.*]] = fir.call @_QPfoo(%[[VAL_15]]) proc_attrs fastmath : (i32) -> i32 ! CHECK: hlfir.yield_element %[[VAL_16]] : i32 ! CHECK: } ! CHECK: %[[VAL_17:.*]]:3 = hlfir.associate %[[VAL_18:.*]](%[[VAL_3]]) {adapt.valuebyref} : (!hlfir.expr<4xi32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) diff --git a/flang/test/Lower/HLFIR/elemental-array-ops.f90 b/flang/test/Lower/HLFIR/elemental-array-ops.f90 index 18e1fb0a787e7..aefc4d978a27d 100644 --- a/flang/test/Lower/HLFIR/elemental-array-ops.f90 +++ b/flang/test/Lower/HLFIR/elemental-array-ops.f90 @@ -182,7 +182,7 @@ end subroutine char_return ! CHECK: %[[VAL_23:.*]] = arith.constant 0 : index ! CHECK: %[[VAL_24:.*]] = arith.cmpi sgt, %[[VAL_22]], %[[VAL_23]] : index ! CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_24]], %[[VAL_22]], %[[VAL_23]] : index -! CHECK: %[[VAL_27:.*]] = fir.call @_QPcallee(%[[VAL_2]], %[[VAL_25]], %[[VAL_20]]) fastmath : (!fir.ref>, index, !fir.boxchar<1>) -> !fir.boxchar<1> +! CHECK: %[[VAL_27:.*]] = fir.call @_QPcallee(%[[VAL_2]], %[[VAL_25]], %[[VAL_20]]) proc_attrs fastmath : (!fir.ref>, index, !fir.boxchar<1>) -> !fir.boxchar<1> ! CHECK: %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_2]] typeparams %[[VAL_25]] {uniq_name = ".tmp.func_result"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[MustFree:.*]] = arith.constant false ! CHECK: %[[ResultTemp:.*]] = hlfir.as_expr %[[VAL_28]]#0 move %[[MustFree]] : (!fir.ref>, i1) -> !hlfir.expr> diff --git a/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 b/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 index aea23d8d94672..d4d8b858aaeea 100644 --- a/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 +++ b/flang/test/Lower/HLFIR/elemental-user-procedure-ref.f90 @@ -18,7 +18,7 @@ real elemental function elem(a, b) ! CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<100xf32> { ! CHECK: ^bb0(%[[VAL_7:.*]]: index): ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_7]]) : (!fir.ref>, index) -> !fir.ref -! CHECK: %[[VAL_9:.*]] = fir.call @_QPelem(%[[VAL_2]]#1, %[[VAL_8]]) fastmath : (!fir.ref, !fir.ref) -> f32 +! CHECK: %[[VAL_9:.*]] = fir.call @_QPelem(%[[VAL_2]]#1, %[[VAL_8]]) proc_attrs fastmath : (!fir.ref, !fir.ref) -> f32 ! CHECK: hlfir.yield_element %[[VAL_9]] : f32 ! CHECK: } ! CHECK: fir.call @@ -43,7 +43,7 @@ real elemental function elem_val(a, b) ! CHECK: ^bb0(%[[VAL_9:.*]]: index, %[[VAL_10:.*]]: index): ! CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]], %[[VAL_10]]) : (!fir.ref>, index, index) -> !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_11]] : !fir.ref -! CHECK: %[[VAL_13:.*]] = fir.call @_QPelem_val(%[[VAL_7]], %[[VAL_12]]) fastmath : (i32, f32) -> f32 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPelem_val(%[[VAL_7]], %[[VAL_12]]) proc_attrs fastmath : (i32, f32) -> f32 ! CHECK: hlfir.yield_element %[[VAL_13]] : f32 ! CHECK: } ! CHECK: fir.call @@ -67,7 +67,7 @@ real elemental function char_elem(a, b) ! CHECK: %[[VAL_9:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<100xf32> { ! CHECK: ^bb0(%[[VAL_10:.*]]: index): ! CHECK: %[[VAL_11:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_10]]) typeparams %[[VAL_4]]#1 : (!fir.box>>, index, index) -> !fir.boxchar<1> -! CHECK: %[[VAL_12:.*]] = fir.call @_QPchar_elem(%[[VAL_3]]#0, %[[VAL_11]]) fastmath : (!fir.boxchar<1>, !fir.boxchar<1>) -> f32 +! CHECK: %[[VAL_12:.*]] = fir.call @_QPchar_elem(%[[VAL_3]]#0, %[[VAL_11]]) proc_attrs fastmath : (!fir.boxchar<1>, !fir.boxchar<1>) -> f32 ! CHECK: hlfir.yield_element %[[VAL_12]] : f32 ! CHECK: } ! CHECK: fir.call @@ -93,7 +93,7 @@ elemental subroutine elem_sub(a, b) ! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_7]] unordered { ! CHECK: fir.do_loop %[[VAL_9:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_7]] unordered { ! CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]], %[[VAL_8]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPelem_sub(%[[VAL_2]]#1, %[[VAL_10]]) fastmath : (!fir.ref, !fir.ref) -> () +! CHECK: fir.call @_QPelem_sub(%[[VAL_2]]#1, %[[VAL_10]]) proc_attrs fastmath : (!fir.ref, !fir.ref) -> () ! CHECK: } ! CHECK: } @@ -116,7 +116,7 @@ impure elemental subroutine impure_elem(a) ! CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_5]] { ! CHECK: fir.do_loop %[[VAL_7:.*]] = %[[VAL_5]] to %[[VAL_1]] step %[[VAL_5]] { ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_7]], %[[VAL_6]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPimpure_elem(%[[VAL_8]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPimpure_elem(%[[VAL_8]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: return @@ -141,7 +141,7 @@ elemental subroutine ordered_elem(a) ! CHECK: fir.do_loop %[[VAL_6:.*]] = %[[VAL_5]] to %[[VAL_2]] step %[[VAL_5]] { ! CHECK: fir.do_loop %[[VAL_7:.*]] = %[[VAL_5]] to %[[VAL_1]] step %[[VAL_5]] { ! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_7]], %[[VAL_6]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPordered_elem(%[[VAL_8]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPordered_elem(%[[VAL_8]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: return @@ -174,7 +174,7 @@ impure elemental subroutine impure_elem(a) ! CHECK: fir.do_loop %[[VAL_14:.*]] = %[[VAL_13]] to %[[VAL_2]] step %[[VAL_13]] { ! CHECK: fir.do_loop %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_1]] step %[[VAL_13]] { ! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_15]], %[[VAL_14]]) : (!fir.ref>, index, index) -> !fir.ref -! CHECK: fir.call @_QPimpure_elem(%[[VAL_16]]) fastmath : (!fir.ref) -> () +! CHECK: fir.call @_QPimpure_elem(%[[VAL_16]]) proc_attrs fastmath : (!fir.ref) -> () ! CHECK: } ! CHECK: } ! CHECK: hlfir.end_associate %[[VAL_11]]#1, %[[VAL_11]]#2 : !fir.ref>, i1 diff --git a/flang/test/Lower/HLFIR/forall.f90 b/flang/test/Lower/HLFIR/forall.f90 index c12f0c6a826b5..709e233746a91 100644 --- a/flang/test/Lower/HLFIR/forall.f90 +++ b/flang/test/Lower/HLFIR/forall.f90 @@ -86,7 +86,7 @@ subroutine test_forall_mask() ! CHECK: } (%[[VAL_9:.*]]: i64) { ! CHECK: %[[VAL_10:.*]] = hlfir.forall_index "i" %[[VAL_9]] : (i64) -> !fir.ref ! CHECK: hlfir.forall_mask { -! CHECK: %[[VAL_11:.*]] = fir.call @_QPpredicate(%[[VAL_10]]) fastmath : (!fir.ref) -> !fir.logical<4> +! CHECK: %[[VAL_11:.*]] = fir.call @_QPpredicate(%[[VAL_10]]) proc_attrs fastmath : (!fir.ref) -> !fir.logical<4> ! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 ! CHECK: hlfir.yield %[[VAL_12]] : i1 ! CHECK: } do { @@ -113,8 +113,8 @@ subroutine test_forall_several_indices() ! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare {{.*}}Ey ! CHECK: %[[VAL_7:.*]] = fir.call @_QPibar() fastmath : () -> i32 ! CHECK: %[[VAL_8:.*]] = fir.call @_QPifoo() fastmath : () -> i32 -! CHECK: %[[VAL_9:.*]] = fir.call @_QPjfoo() fastmath : () -> i64 -! CHECK: %[[VAL_10:.*]] = fir.call @_QPjbar() fastmath : () -> i64 +! CHECK: %[[VAL_9:.*]] = fir.call @_QPjfoo() proc_attrs fastmath : () -> i64 +! CHECK: %[[VAL_10:.*]] = fir.call @_QPjbar() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.forall lb { ! CHECK: hlfir.yield %[[VAL_7]] : i32 ! CHECK: } ub { @@ -126,7 +126,7 @@ subroutine test_forall_several_indices() ! CHECK: hlfir.yield %[[VAL_10]] : i64 ! CHECK: } (%[[VAL_12:.*]]: i64) { ! CHECK: hlfir.region_assign { -! CHECK: %[[VAL_13:.*]] = fir.call @_QPifoo2(%[[VAL_11]], %[[VAL_12]]) fastmath : (i64, i64) -> i64 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPifoo2(%[[VAL_11]], %[[VAL_12]]) proc_attrs fastmath : (i64, i64) -> i64 ! CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_13]]) : (!fir.ref>, i64) -> !fir.ref ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref ! CHECK: hlfir.yield %[[VAL_15]] : i32 @@ -169,10 +169,10 @@ subroutine test_nested_foralls() ! CHECK: hlfir.yield %[[VAL_12]] : !fir.ref ! CHECK: } ! CHECK: hlfir.forall lb { -! CHECK: %[[VAL_13:.*]] = fir.call @_QPjfoo() fastmath : () -> i64 +! CHECK: %[[VAL_13:.*]] = fir.call @_QPjfoo() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.yield %[[VAL_13]] : i64 ! CHECK: } ub { -! CHECK: %[[VAL_14:.*]] = fir.call @_QPjbar() fastmath : () -> i64 +! CHECK: %[[VAL_14:.*]] = fir.call @_QPjbar() proc_attrs fastmath : () -> i64 ! CHECK: hlfir.yield %[[VAL_14]] : i64 ! CHECK: } (%[[VAL_15:.*]]: i64) { ! CHECK: hlfir.region_assign { diff --git a/flang/test/Lower/HLFIR/where-nonelemental.f90 b/flang/test/Lower/HLFIR/where-nonelemental.f90 index 15a281b0ba681..643f417c47674 100644 --- a/flang/test/Lower/HLFIR/where-nonelemental.f90 +++ b/flang/test/Lower/HLFIR/where-nonelemental.f90 @@ -125,7 +125,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.where { ! CHECK: %[[VAL_21:.*]] = llvm.intr.stacksave : !llvm.ptr ! CHECK-NOT: hlfir.exactly_once -! CHECK: %[[VAL_23:.*]] = fir.call @_QPpure_logical_func1() fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: %[[VAL_23:.*]] = fir.call @_QPpure_logical_func1() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> ! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { ! CHECK: llvm.intr.stackrestore %[[VAL_21]] : !llvm.ptr ! CHECK: } @@ -173,7 +173,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.elsewhere mask { ! CHECK: %[[VAL_129:.*]] = hlfir.exactly_once : !hlfir.expr<100x!fir.logical<4>> { ! CHECK: %[[VAL_139:.*]] = llvm.intr.stacksave : !llvm.ptr -! CHECK: %[[VAL_141:.*]] = fir.call @_QPpure_logical_func2() fastmath : () -> !fir.array<100x!fir.logical<4>> +! CHECK: %[[VAL_141:.*]] = fir.call @_QPpure_logical_func2() proc_attrs fastmath : () -> !fir.array<100x!fir.logical<4>> ! CHECK: hlfir.yield %{{.*}} : !hlfir.expr<100x!fir.logical<4>> cleanup { ! CHECK: llvm.intr.stackrestore %[[VAL_139]] : !llvm.ptr ! CHECK: } @@ -185,7 +185,7 @@ integer pure function pure_ifoo() ! CHECK: hlfir.yield %{{.*}} : !fir.box> ! CHECK: } to { ! CHECK: %[[VAL_165:.*]] = hlfir.exactly_once : i32 { -! CHECK: %[[VAL_166:.*]] = fir.call @_QPpure_ifoo() fastmath : () -> i32 +! CHECK: %[[VAL_166:.*]] = fir.call @_QPpure_ifoo() proc_attrs fastmath : () -> i32 ! CHECK: hlfir.yield %[[VAL_166]] : i32 ! CHECK: } ! CHECK: hlfir.designate diff --git a/flang/test/Lower/array-elemental-calls-char.f90 b/flang/test/Lower/array-elemental-calls-char.f90 index 652e79232c1b5..603cc677805fc 100644 --- a/flang/test/Lower/array-elemental-calls-char.f90 +++ b/flang/test/Lower/array-elemental-calls-char.f90 @@ -123,7 +123,7 @@ subroutine foo2b(i, j, c) ! CHECK: %[[VAL_13:.*]] = fir.emboxchar %[[VAL_7]], %[[VAL_3]] : (!fir.ref>, index) -> !fir.boxchar<1> ! CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_9]], %[[VAL_5]] : index ! CHECK: %[[VAL_15:.*]] = fir.array_coor %[[VAL_1]](%[[VAL_8]]) %[[VAL_14]] : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref -! CHECK: %[[VAL_16:.*]] = fir.call @_QPelem2(%[[VAL_13]], %[[VAL_15]]) fastmath : (!fir.boxchar<1>, !fir.ref) -> i32 +! CHECK: %[[VAL_16:.*]] = fir.call @_QPelem2(%[[VAL_13]], %[[VAL_15]]) proc_attrs fastmath : (!fir.boxchar<1>, !fir.ref) -> i32 ! CHECK: %[[VAL_17:.*]] = fir.array_coor %[[VAL_0]](%[[VAL_8]]) %[[VAL_14]] : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref ! CHECK: fir.store %[[VAL_16]] to %[[VAL_17]] : !fir.ref ! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_10]], %[[VAL_5]] : index diff --git a/flang/test/Lower/array-user-def-assignments.f90 b/flang/test/Lower/array-user-def-assignments.f90 index 97090ff77678c..e88bc2fb861ba 100644 --- a/flang/test/Lower/array-user-def-assignments.f90 +++ b/flang/test/Lower/array-user-def-assignments.f90 @@ -442,7 +442,7 @@ elemental subroutine sto_char(a,b) ! CHECK: %[[V_6:[0-9]+]] = fir.do_loop %arg2 = %[[V_2]] to %[[V_3]] step %[[C_1]] unordered iter_args(%arg3 = %[[V_5]]) -> (!fir.array<10x!fir.logical<4>>) { ! CHECK: %[[V_7:[0-9]+]] = fir.convert %arg2 : (index) -> i32 ! CHECK: fir.store %[[V_7]] to %[[V_1:[0-9]+]] : !fir.ref -! CHECK: %[[V_8:[0-9]+]] = fir.call @_QPreturns_alloc(%[[V_1]]) fastmath : (!fir.ref) -> !fir.box> +! CHECK: %[[V_8:[0-9]+]] = fir.call @_QPreturns_alloc(%[[V_1]]) proc_attrs fastmath : (!fir.ref) -> !fir.box> ! CHECK: fir.save_result %[[V_8]] to %[[V_0:[0-9]+]] : !fir.box>, !fir.ref>> ! CHECK: %[[V_9:[0-9]+]] = fir.load %[[V_0:[0-9]+]] : !fir.ref>> ! CHECK: %[[V_10:[0-9]+]] = fir.box_addr %[[V_9:[0-9]+]] : (!fir.box>) -> !fir.heap From a024a0ceedae886c254b496c9321f9ef253cd7f8 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 25 Sep 2024 16:46:46 +0200 Subject: [PATCH 180/212] [clang][bytecode] Override InConstantContext flag for immediate calls (#109967) And fix the diagnostics for __builtin_is_constant_evaluated(). We can be in a non-constant context, but calling an immediate function always makes the context constant for the duration of that call. --- clang/lib/AST/ByteCode/Interp.cpp | 1 + clang/lib/AST/ByteCode/InterpBuiltin.cpp | 15 +++++++------- clang/lib/AST/ByteCode/InterpState.cpp | 7 +++++++ clang/lib/AST/ByteCode/InterpState.h | 24 ++++++++++++++++++++++- clang/test/CodeGenCXX/cxx2a-consteval.cpp | 8 ++++++++ 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index b9c85626ffa99..2f4a05a85753c 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1136,6 +1136,7 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, InterpFrame *FrameBefore = S.Current; S.Current = NewFrame.get(); + InterpStateCCOverride CCOverride(S, Func->getDecl()->isImmediateFunction()); APValue CallResult; // Note that we cannot assert(CallResult.hasValue()) here since // Ret() above only sets the APValue if the curent frame doesn't diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 68710f67be200..82ed6d9e7a2ff 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -136,16 +136,17 @@ static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result, static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { + unsigned Depth = S.Current->getDepth(); + auto isStdCall = [](const FunctionDecl *F) -> bool { + return F && F->isInStdNamespace() && F->getIdentifier() && + F->getIdentifier()->isStr("is_constant_evaluated"); + }; + const InterpFrame *Caller = Frame->Caller; // The current frame is the one for __builtin_is_constant_evaluated. // The one above that, potentially the one for std::is_constant_evaluated(). if (S.inConstantContext() && !S.checkingPotentialConstantExpression() && - Frame->Caller && S.getEvalStatus().Diag) { - auto isStdCall = [](const FunctionDecl *F) -> bool { - return F && F->isInStdNamespace() && F->getIdentifier() && - F->getIdentifier()->isStr("is_constant_evaluated"); - }; - const InterpFrame *Caller = Frame->Caller; - + S.getEvalStatus().Diag && + (Depth == 1 || (Depth == 2 && isStdCall(Caller->getCallee())))) { if (Caller->Caller && isStdCall(Caller->getCallee())) { const Expr *E = Caller->Caller->getExpr(Caller->getRetPC()); S.report(E->getExprLoc(), diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp index 4ea05305540ee..287c3bd3bca3a 100644 --- a/clang/lib/AST/ByteCode/InterpState.cpp +++ b/clang/lib/AST/ByteCode/InterpState.cpp @@ -19,6 +19,13 @@ InterpState::InterpState(State &Parent, Program &P, InterpStack &Stk, Context &Ctx, SourceMapper *M) : Parent(Parent), M(M), P(P), Stk(Stk), Ctx(Ctx), Current(nullptr) {} +bool InterpState::inConstantContext() const { + if (ConstantContextOverride) + return *ConstantContextOverride; + + return Parent.InConstantContext; +} + InterpState::~InterpState() { while (Current) { InterpFrame *Next = Current->Caller; diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h index 4b7371450cc98..2a1311c86a2f2 100644 --- a/clang/lib/AST/ByteCode/InterpState.h +++ b/clang/lib/AST/ByteCode/InterpState.h @@ -77,7 +77,7 @@ class InterpState final : public State, public SourceMapper { bool noteUndefinedBehavior() override { return Parent.noteUndefinedBehavior(); } - bool inConstantContext() const { return Parent.InConstantContext; } + bool inConstantContext() const; bool hasActiveDiagnostic() override { return Parent.hasActiveDiagnostic(); } void setActiveDiagnostic(bool Flag) override { Parent.setActiveDiagnostic(Flag); @@ -116,6 +116,7 @@ class InterpState final : public State, public SourceMapper { private: friend class EvaluationResult; + friend class InterpStateCCOverride; /// AST Walker state. State &Parent; /// Dead block chain. @@ -124,6 +125,7 @@ class InterpState final : public State, public SourceMapper { SourceMapper *M; /// Allocator used for dynamic allocations performed via the program. DynamicAllocator Alloc; + std::optional ConstantContextOverride; public: /// Reference to the module containing all bytecode. @@ -144,6 +146,26 @@ class InterpState final : public State, public SourceMapper { SeenGlobalTemporaries; }; +class InterpStateCCOverride final { +public: + InterpStateCCOverride(InterpState &Ctx, bool Value) + : Ctx(Ctx), OldCC(Ctx.ConstantContextOverride) { + // We only override this if the new value is true. + Enabled = Value; + if (Enabled) + Ctx.ConstantContextOverride = Value; + } + ~InterpStateCCOverride() { + if (Enabled) + Ctx.ConstantContextOverride = OldCC; + } + +private: + bool Enabled; + InterpState &Ctx; + std::optional OldCC; +}; + } // namespace interp } // namespace clang diff --git a/clang/test/CodeGenCXX/cxx2a-consteval.cpp b/clang/test/CodeGenCXX/cxx2a-consteval.cpp index a58a09554699d..bfeabc946da41 100644 --- a/clang/test/CodeGenCXX/cxx2a-consteval.cpp +++ b/clang/test/CodeGenCXX/cxx2a-consteval.cpp @@ -6,6 +6,14 @@ // RUN: %clang_cc1 -emit-llvm %s -Dconsteval="" -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll // RUN: FileCheck -check-prefix=EXPR -input-file=%t.ll %s +// RUN: %clang_cc1 -emit-llvm %s -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll -fexperimental-new-constant-interpreter +// RUN: FileCheck -check-prefix=EVAL -input-file=%t.ll %s +// RUN: FileCheck -check-prefix=EVAL-STATIC -input-file=%t.ll %s +// RUN: FileCheck -check-prefix=EVAL-FN -input-file=%t.ll %s +// +// RUN: %clang_cc1 -emit-llvm %s -Dconsteval="" -std=c++2a -triple x86_64-unknown-linux-gnu -o %t.ll -fexperimental-new-constant-interpreter +// RUN: FileCheck -check-prefix=EXPR -input-file=%t.ll %s + // there is two version of symbol checks to ensure // that the symbol we are looking for are correct // EVAL-NOT: @__cxx_global_var_init() From 74dcf0b595d4d230f65a7bba7b0164c019d3c08b Mon Sep 17 00:00:00 2001 From: Abhina Sree Date: Wed, 25 Sep 2024 10:49:45 -0400 Subject: [PATCH 181/212] [SystemZ][z/OS] Open text files in text mode (#109972) This patch continues the work that was started here https://reviews.llvm.org/D99426 to correctly open text files in text mode. --- clang/lib/Driver/OffloadBundler.cpp | 7 ++++--- llvm/tools/llvm-link/llvm-link.cpp | 6 +++--- llvm/tools/llvm-objdump/SourcePrinter.cpp | 3 ++- llvm/tools/llvm-rc/llvm-rc.cpp | 4 ++-- llvm/tools/llvm-readtapi/llvm-readtapi.cpp | 2 +- llvm/tools/llvm-strings/llvm-strings.cpp | 2 +- llvm/utils/split-file/split-file.cpp | 4 ++-- 7 files changed, 15 insertions(+), 13 deletions(-) diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index aaa4fdf03be1e..687a38333e128 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -1192,7 +1192,7 @@ Error OffloadBundler::ListBundleIDsInFile( StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) { // Open Input file. ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(InputFileName); + MemoryBuffer::getFileOrSTDIN(InputFileName, /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(InputFileName, EC); @@ -1324,7 +1324,7 @@ Error OffloadBundler::BundleFiles() { InputBuffers.reserve(BundlerConfig.InputFileNames.size()); for (auto &I : BundlerConfig.InputFileNames) { ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(I); + MemoryBuffer::getFileOrSTDIN(I, /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(I, EC); InputBuffers.emplace_back(std::move(*CodeOrErr)); @@ -1392,7 +1392,8 @@ Error OffloadBundler::BundleFiles() { Error OffloadBundler::UnbundleFiles() { // Open Input file. ErrorOr> CodeOrErr = - MemoryBuffer::getFileOrSTDIN(BundlerConfig.InputFileNames.front()); + MemoryBuffer::getFileOrSTDIN(BundlerConfig.InputFileNames.front(), + /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) return createFileError(BundlerConfig.InputFileNames.front(), EC); diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index 317b6e20f64cf..34bb6ce30b766 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -330,8 +330,8 @@ static bool importFunctions(const char *argv0, Module &DestModule) { auto ModuleLoader = [&DestModule](const char *argv0, const std::string &Identifier) { - std::unique_ptr Buffer = - ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Identifier))); + std::unique_ptr Buffer = ExitOnErr(errorOrToExpected( + MemoryBuffer::getFileOrSTDIN(Identifier, /*IsText=*/true))); return loadFile(argv0, std::move(Buffer), DestModule.getContext(), false); }; @@ -402,7 +402,7 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L, // Similar to some flags, internalization doesn't apply to the first file. bool InternalizeLinkedSymbols = false; for (const auto &File : Files) { - auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(File); + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true); // When we encounter a missing file, make sure we expose its name. if (auto EC = BufferOrErr.getError()) diff --git a/llvm/tools/llvm-objdump/SourcePrinter.cpp b/llvm/tools/llvm-objdump/SourcePrinter.cpp index 7099390f24233..600bd6aa4d51e 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.cpp +++ b/llvm/tools/llvm-objdump/SourcePrinter.cpp @@ -344,7 +344,8 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { if (LineInfo.Source) { Buffer = MemoryBuffer::getMemBuffer(*LineInfo.Source); } else { - auto BufferOrError = MemoryBuffer::getFile(LineInfo.FileName); + auto BufferOrError = + MemoryBuffer::getFile(LineInfo.FileName, /*IsText=*/true); if (!BufferOrError) { if (MissingSources.insert(LineInfo.FileName).second) reportWarning("failed to find source " + LineInfo.FileName, diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 51214524adeef..4bc9d90095575 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -603,7 +603,7 @@ void doRc(std::string Src, std::string Dest, RcOptions &Opts, // Read and tokenize the input file. ErrorOr> File = - MemoryBuffer::getFile(PreprocessedFile); + MemoryBuffer::getFile(PreprocessedFile, /*IsText=*/true); if (!File) { fatalError("Error opening file '" + Twine(PreprocessedFile) + "': " + File.getError().message()); @@ -682,7 +682,7 @@ void doCvtres(std::string Src, std::string Dest, std::string TargetTriple) { object::WindowsResourceParser Parser; ErrorOr> BufferOrErr = - MemoryBuffer::getFile(Src); + MemoryBuffer::getFile(Src, /*IsText=*/true); if (!BufferOrErr) fatalError("Error opening file '" + Twine(Src) + "': " + BufferOrErr.getError().message()); diff --git a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp index 1f183975d9481..c287dac4cd239 100644 --- a/llvm/tools/llvm-readtapi/llvm-readtapi.cpp +++ b/llvm/tools/llvm-readtapi/llvm-readtapi.cpp @@ -125,7 +125,7 @@ static std::unique_ptr getInterfaceFile(const StringRef Filename, bool ResetBanner = true) { ExitOnErr.setBanner(TOOLNAME + ": error: '" + Filename.str() + "' "); ErrorOr> BufferOrErr = - MemoryBuffer::getFile(Filename); + MemoryBuffer::getFile(Filename, /*IsText=*/true); if (BufferOrErr.getError()) ExitOnErr(errorCodeToError(BufferOrErr.getError())); auto Buffer = std::move(*BufferOrErr); diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp index 8642be3127fed..d4305096b60a0 100644 --- a/llvm/tools/llvm-strings/llvm-strings.cpp +++ b/llvm/tools/llvm-strings/llvm-strings.cpp @@ -173,7 +173,7 @@ int main(int argc, char **argv) { for (const auto &File : InputFileNames) { ErrorOr> Buffer = - MemoryBuffer::getFileOrSTDIN(File); + MemoryBuffer::getFileOrSTDIN(File, /*IsText=*/true); if (std::error_code EC = Buffer.getError()) errs() << File << ": " << EC.message() << '\n'; else diff --git a/llvm/utils/split-file/split-file.cpp b/llvm/utils/split-file/split-file.cpp index 2ad04d6e42f2b..672877adaba31 100644 --- a/llvm/utils/split-file/split-file.cpp +++ b/llvm/utils/split-file/split-file.cpp @@ -123,7 +123,7 @@ static int handle(MemoryBuffer &inputBuf, StringRef input) { if (ec) fatal(input, ec.message()); auto f = std::make_unique(partPath.str(), ec, - llvm::sys::fs::OF_None); + llvm::sys::fs::OF_Text); if (!f) fatal(input, ec.message()); @@ -156,7 +156,7 @@ int main(int argc, const char **argv) { if (output.empty()) fatal("", "output directory is not specified"); ErrorOr> bufferOrErr = - MemoryBuffer::getFileOrSTDIN(input); + MemoryBuffer::getFileOrSTDIN(input, /*IsText=*/true); if (std::error_code ec = bufferOrErr.getError()) fatal(input, ec.message()); From 9a0e281e8ccfc57ed5a5754d320b710efad6d303 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Wed, 25 Sep 2024 14:49:50 +0000 Subject: [PATCH 182/212] Revert "[NVVM] Upgrade nvvm.ptr.* intrinics to addrspace cast (#109710)" This reverts commit 36757613b73908f055674a8df0b51cc00aa04373. --- llvm/docs/NVPTXUsage.rst | 63 +++++++++++++ llvm/docs/ReleaseNotes.rst | 12 --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 50 +++++++--- llvm/lib/IR/AutoUpgrade.cpp | 19 ---- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 58 ++++++------ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 4 + llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 92 +++++++++++++------ .../Assembler/auto_upgrade_nvvm_intrinsics.ll | 35 ------- llvm/test/CodeGen/NVPTX/intrin-nocapture.ll | 21 +++++ llvm/test/DebugInfo/NVPTX/debug-info.ll | 20 ++-- 10 files changed, 228 insertions(+), 146 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/intrin-nocapture.ll diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 8b0b05c0ea424..3a566bbac3623 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -127,6 +127,69 @@ Example: 64-bit PTX for CUDA Driver API: ``nvptx64-nvidia-cuda`` NVPTX Intrinsics ================ +Address Space Conversion +------------------------ + +'``llvm.nvvm.ptr.*.to.gen``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +These are overloaded intrinsics. You can use these on any pointer types. + +.. code-block:: llvm + + declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1)) + declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) + declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) + declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) + +Overview: +""""""""" + +The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic +address space to a generic address space pointer. + +Semantics: +"""""""""" + +These intrinsics modify the pointer value to be a valid generic address space +pointer. + + +'``llvm.nvvm.ptr.gen.to.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +These are overloaded intrinsics. You can use these on any pointer types. + +.. code-block:: llvm + + declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) + declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) + declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr) + declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr) + +Overview: +""""""""" + +The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic +address space to a pointer in the target address space. Note that these +intrinsics are only useful if the address space of the target address space of +the pointer is known. It is not legal to use address space conversion +intrinsics to convert a pointer from one non-generic address space to another +non-generic address space. + +Semantics: +"""""""""" + +These intrinsics modify the pointer value to be a valid pointer in the target +non-generic address space. + + Reading PTX Special Registers ----------------------------- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 9bf838c39643d..0784d93f18da8 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -69,18 +69,6 @@ Changes to the LLVM IR * ``llvm.nvvm.rotate.right.b64`` * ``llvm.nvvm.rotate.b64`` -* Remove the following intrinsics which can be replaced with an - ``addrspacecast``: - - * ``llvm.nvvm.ptr.gen.to.global`` - * ``llvm.nvvm.ptr.gen.to.shared`` - * ``llvm.nvvm.ptr.gen.to.constant`` - * ``llvm.nvvm.ptr.gen.to.local`` - * ``llvm.nvvm.ptr.global.to.gen`` - * ``llvm.nvvm.ptr.shared.to.gen`` - * ``llvm.nvvm.ptr.constant.to.gen`` - * ``llvm.nvvm.ptr.local.to.gen`` - Changes to LLVM infrastructure ------------------------------ diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 7b8ffe417fccd..aa5294f5f9c90 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -30,18 +30,10 @@ // * llvm.nvvm.max.ui --> select(x ule y, x, y) // * llvm.nvvm.max.ull --> ibid. // * llvm.nvvm.h2f --> llvm.convert.to.fp16.f32 -// * llvm.nvvm.bitcast.f2i --> bitcast -// * llvm.nvvm.bitcast.i2f --> ibid. -// * llvm.nvvm.bitcast.d2ll --> ibid. -// * llvm.nvvm.bitcast.ll2d --> ibid. -// * llvm.nvvm.ptr.gen.to.global --> addrspacecast -// * llvm.nvvm.ptr.gen.to.shared --> ibid. -// * llvm.nvvm.ptr.gen.to.constant --> ibid. -// * llvm.nvvm.ptr.gen.to.local --> ibid. -// * llvm.nvvm.ptr.global.to.gen --> ibid. -// * llvm.nvvm.ptr.shared.to.gen --> ibid. -// * llvm.nvvm.ptr.constant.to.gen --> ibid. -// * llvm.nvvm.ptr.local.to.gen --> ibid. +// * llvm.nvvm.bitcast.f2i --> bitcast +// * llvm.nvvm.bitcast.i2f --> ibid. +// * llvm.nvvm.bitcast.d2ll --> ibid. +// * llvm.nvvm.bitcast.ll2d --> ibid. def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr @@ -1610,6 +1602,40 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.p">; +// Use for generic pointers +// - These intrinsics are used to convert address spaces. +// - The input pointer and output pointer must have the same type, except for +// the address-space. (This restriction is not enforced here as there is +// currently no way to describe it). +// - This complements the llvm bitcast, which can be used to cast one type +// of pointer to another type of pointer, while the address space remains +// the same. +def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.local.to.gen">; +def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.shared.to.gen">; +def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.global.to.gen">; +def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.constant.to.gen">; + +def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.gen.to.global">; +def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.gen.to.shared">; +def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.gen.to.local">; +def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], + "llvm.nvvm.ptr.gen.to.constant">; + // Used in nvvm internally to help address space opt and ptx code generation // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index b84258398c193..3390d651d6c69 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1275,16 +1275,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, else if (Name.consume_front("rotate.")) // nvvm.rotate.{b32,b64,right.b64} Expand = Name == "b32" || Name == "b64" || Name == "right.b64"; - else if (Name.consume_front("ptr.gen.to.")) - // nvvm.ptr.gen.to.{local,shared,global,constant} - Expand = Name.starts_with("local") || Name.starts_with("shared") || - Name.starts_with("global") || Name.starts_with("constant"); - else if (Name.consume_front("ptr.")) - // nvvm.ptr.{local,shared,global,constant}.to.gen - Expand = - (Name.consume_front("local") || Name.consume_front("shared") || - Name.consume_front("global") || Name.consume_front("constant")) && - Name.starts_with(".to.gen"); else Expand = false; @@ -2348,15 +2338,6 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, {Arg, Arg, ZExtShiftAmt}); - } else if ((Name.consume_front("ptr.gen.to.") && - (Name.starts_with("local") || Name.starts_with("shared") || - Name.starts_with("global") || Name.starts_with("constant"))) || - (Name.consume_front("ptr.") && - (Name.consume_front("local") || Name.consume_front("shared") || - Name.consume_front("global") || - Name.consume_front("constant")) && - Name.starts_with(".to.gen"))) { - Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType()); } else { Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); if (IID != Intrinsic::not_intrinsic && diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 7f942de74bdcc..56c96ea943b89 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1109,21 +1109,11 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { AddrSpaceCastSDNode *CastN = cast(N); unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); unsigned DstAddrSpace = CastN->getDestAddressSpace(); - SDLoc DL(N); assert(SrcAddrSpace != DstAddrSpace && "addrspacecast must be between different address spaces"); if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { // Specific to generic - - if (TM.is64Bit() && TM.getPointerSizeInBits(SrcAddrSpace) == 32) { - SDValue CvtNone = - CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); - SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u64_u32, DL, MVT::i64, - Src, CvtNone); - Src = SDValue(Cvt, 0); - } - unsigned Opc; switch (SrcAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); @@ -1131,16 +1121,26 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global; break; case ADDRESS_SPACE_SHARED: - Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 + ? NVPTX::cvta_shared_6432 + : NVPTX::cvta_shared_64) + : NVPTX::cvta_shared; break; case ADDRESS_SPACE_CONST: - Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 + ? NVPTX::cvta_const_6432 + : NVPTX::cvta_const_64) + : NVPTX::cvta_const; break; case ADDRESS_SPACE_LOCAL: - Opc = TM.is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 + ? NVPTX::cvta_local_6432 + : NVPTX::cvta_local_64) + : NVPTX::cvta_local; break; } - ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src)); + ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), + Src)); return; } else { // Generic to specific @@ -1153,28 +1153,30 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global; break; case ADDRESS_SPACE_SHARED: - Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 + ? NVPTX::cvta_to_shared_3264 + : NVPTX::cvta_to_shared_64) + : NVPTX::cvta_to_shared; break; case ADDRESS_SPACE_CONST: - Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 + ? NVPTX::cvta_to_const_3264 + : NVPTX::cvta_to_const_64) + : NVPTX::cvta_to_const; break; case ADDRESS_SPACE_LOCAL: - Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local; + Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 + ? NVPTX::cvta_to_local_3264 + : NVPTX::cvta_to_local_64) + : NVPTX::cvta_to_local; break; case ADDRESS_SPACE_PARAM: - Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr; + Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 + : NVPTX::nvvm_ptr_gen_to_param; break; } - - SDNode *CVTA = CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src); - if (TM.is64Bit() && TM.getPointerSizeInBits(DstAddrSpace) == 32) { - SDValue CvtNone = - CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); - CVTA = CurDAG->getMachineNode(NVPTX::CVT_u32_u64, DL, MVT::i32, - SDValue(CVTA, 0), CvtNone); - } - - ReplaceNode(N, CVTA); + ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), + Src)); return; } } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index c3a8a774673f2..f6bbf4c2ffc02 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -174,6 +174,10 @@ def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" "&& Subtarget->getPTXVersion() >= 64)">; +def useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">; +def useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">; +def useShortPtrConst : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_CONST) == 32">; + def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 042b0965ea33f..2688cfbe5e824 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2537,45 +2537,59 @@ defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -multiclass NG_TO_G { +multiclass NG_TO_G { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - "cvta." # Str # ".u32 \t$result, $src;", []>; + !strconcat("cvta.", Str, ".u32 \t$result, $src;"), + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - "cvta." # Str # ".u64 \t$result, $src;", []>; + !strconcat("cvta.", Str, ".u64 \t$result, $src;"), + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; + def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), + "{{ .reg .b64 %tmp;\n\t" + #" cvt.u64.u32 \t%tmp, $src;\n\t" + #" cvta." # Str # ".u64 \t$result, %tmp; }}", + [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, + Requires<[ShortPtr]>; } -multiclass G_TO_NG { +multiclass G_TO_NG { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - "cvta.to." # Str # ".u32 \t$result, $src;", []>; + !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - "cvta.to." # Str # ".u64 \t$result, $src;", []>; -} - -defm cvta_local : NG_TO_G<"local">; -defm cvta_shared : NG_TO_G<"shared">; -defm cvta_global : NG_TO_G<"global">; -defm cvta_const : NG_TO_G<"const">; - -defm cvta_to_local : G_TO_NG<"local">; -defm cvta_to_shared : G_TO_NG<"shared">; -defm cvta_to_global : G_TO_NG<"global">; -defm cvta_to_const : G_TO_NG<"const">; - -// nvvm.ptr.param.to.gen -defm cvta_param : NG_TO_G<"param">; - -def : Pat<(int_nvvm_ptr_param_to_gen Int32Regs:$src), - (cvta_param Int32Regs:$src)>; - -def : Pat<(int_nvvm_ptr_param_to_gen Int64Regs:$src), - (cvta_param_64 Int64Regs:$src)>; + !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; + def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), + "{{ .reg .b64 %tmp;\n\t" + #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" + #" cvt.u32.u64 \t$result, %tmp; }}", + [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, + Requires<[ShortPtr]>; +} + +defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>; +defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>; +defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>; +defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>; +defm cvta_param : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>; + +defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>; +defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>; +defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>; +defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>; // nvvm.ptr.gen.to.param -def : Pat<(int_nvvm_ptr_gen_to_param Int32Regs:$src), - (IMOV32rr Int32Regs:$src)>; +def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), + (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, + (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; +def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), + (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, + (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; -def : Pat<(int_nvvm_ptr_gen_to_param Int64Regs:$src), - (IMOV64rr Int64Regs:$src)>; // nvvm.move intrinsicc def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), @@ -2618,6 +2632,24 @@ def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), [(set Int64Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ + +// MoveParam %r1, param +// ptr_local_to_gen %r2, %r1 +// ptr_gen_to_local %r3, %r2 +// -> +// mov %r1, param + +// @TODO: Revisit this. There is a type +// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym +// instructions are not currently defined. However, we can use the ptr +// variants and the asm printer will do the right thing. +def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen + (MoveParam texternalsym:$src)))), + (nvvm_move_ptr64 texternalsym:$src)>; +def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen + (MoveParam texternalsym:$src)))), + (nvvm_move_ptr32 texternalsym:$src)>; + def texsurf_handles : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), "mov.u64 \t$result, $src;", []>; diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 584c0ef7cfeb7..43ac246055da7 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -35,15 +35,6 @@ declare i32 @llvm.nvvm.rotate.b32(i32, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) declare i64 @llvm.nvvm.rotate.b64(i64, i32) -declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) -declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) -declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr) -declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr) -declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1)) -declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) -declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) -declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) - ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -165,29 +156,3 @@ define void @rotate(i32 %a, i64 %b) { %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8) ret void } - -; CHECK-LABEL: @addrspacecast -define void @addrspacecast(ptr %p0) { -; CHECK: %1 = addrspacecast ptr %p0 to ptr addrspace(1) -; CHECK: %2 = addrspacecast ptr addrspace(1) %1 to ptr -; CHECK: %3 = addrspacecast ptr %2 to ptr addrspace(3) -; CHECK: %4 = addrspacecast ptr addrspace(3) %3 to ptr -; CHECK: %5 = addrspacecast ptr %4 to ptr addrspace(4) -; CHECK: %6 = addrspacecast ptr addrspace(4) %5 to ptr -; CHECK: %7 = addrspacecast ptr %6 to ptr addrspace(5) -; CHECK: %8 = addrspacecast ptr addrspace(5) %7 to ptr -; - %p1 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %p0) - %p2 = call ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1) %p1) - - %p3 = call ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr %p2) - %p4 = call ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3) %p3) - - %p5 = call ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr %p4) - %p6 = call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) %p5) - - %p7 = call ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr %p6) - %p8 = call ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5) %p7) - - ret void -} diff --git a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll new file mode 100644 index 0000000000000..040bbde13800c --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -O3 -S | FileCheck %s + +; Address space intrinsics were erroneously marked NoCapture, leading to bad +; optimizations (such as the store below being eliminated as dead code). This +; test makes sure we don't regress. + +declare void @foo(ptr addrspace(1)) + +declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) + +; CHECK: @bar +define void @bar() { + %t1 = alloca i32 +; CHECK: call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr nonnull %t1) +; CHECK-NEXT: store i32 10, ptr %t1 + %t2 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %t1) + store i32 10, ptr %t1 + call void @foo(ptr addrspace(1) %t2) + ret void +} + diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 922a420820f46..9948925db57c9 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -25,10 +25,6 @@ ; CHECK-DAG: .reg .b64 %rd<8>; ; CHECK: .loc [[DEBUG_INFO_CU:[0-9]+]] 5 0 ; CHECK: ld.param.u32 %r{{.+}}, [{{.+}}]; -; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; -; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; -; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; -; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[BUILTUIN_VARS_H:[0-9]+]] 78 180 ; CHECK: mov.u32 %r{{.+}}, %ctaid.x; ; CHECK: .loc [[BUILTUIN_VARS_H]] 89 180 @@ -42,6 +38,10 @@ ; CHECK: .loc [[DEBUG_INFO_CU]] 7 7 ; CHECK: @%p{{.+}} bra [[BB:\$L__.+]]; ; CHECK: ld.param.f32 %f{{.+}}, [{{.+}}]; +; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; +; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; +; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; +; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[DEBUG_INFO_CU]] 8 13 ; CHECK: mul.wide.u32 %rd{{.+}}, %r{{.+}}, 4; ; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}}; @@ -2661,22 +2661,22 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b32 4579 // DW_AT_type ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8aa:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 707 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp0 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 11 // DW_AT_call_column ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8c2:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 1466 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 24 // DW_AT_call_column ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8da:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 2060 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp4 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column From 4cb61c20ef38c6020389a15e739bac929b15425a Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Wed, 25 Sep 2024 14:50:04 +0000 Subject: [PATCH 183/212] Revert "[NVPTX] deprecate nvvm.rotate.* intrinsics, cleanup funnel-shift handling (#107655)" This reverts commit 9ac00b85e05d21be658d6aa0c91cbe05bb5dbde2. --- llvm/docs/ReleaseNotes.rst | 6 - llvm/include/llvm/IR/IntrinsicsNVVM.td | 16 + llvm/lib/IR/AutoUpgrade.cpp | 184 ++++---- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 21 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 197 ++++++-- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 129 +++++- .../Assembler/auto_upgrade_nvvm_intrinsics.ll | 18 +- llvm/test/CodeGen/NVPTX/rotate.ll | 433 +++++++----------- llvm/test/CodeGen/NVPTX/rotate_64.ll | 33 +- 9 files changed, 574 insertions(+), 463 deletions(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 0784d93f18da8..05f5bd65fc5f6 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -63,12 +63,6 @@ Changes to the LLVM IR * ``llvm.nvvm.bitcast.d2ll`` * ``llvm.nvvm.bitcast.ll2d`` -* Remove the following intrinsics which can be replaced with a funnel-shift: - - * ``llvm.nvvm.rotate.b32`` - * ``llvm.nvvm.rotate.right.b64`` - * ``llvm.nvvm.rotate.b64`` - Changes to LLVM infrastructure ------------------------------ diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index aa5294f5f9c90..737dd6092e218 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4479,6 +4479,22 @@ def int_nvvm_sust_p_3d_v4i32_trap "llvm.nvvm.sust.p.3d.v4i32.trap">, ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; + +def int_nvvm_rotate_b32 + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">, + ClangBuiltin<"__nvvm_rotate_b32">; + +def int_nvvm_rotate_b64 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">, + ClangBuiltin<"__nvvm_rotate_b64">; + +def int_nvvm_rotate_right_b64 + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">, + ClangBuiltin<"__nvvm_rotate_right_b64">; + def int_nvvm_swap_lo_hi_b64 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">, diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 3390d651d6c69..02d1d9d9f7898 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1272,9 +1272,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, // nvvm.bitcast.{f2i,i2f,ll2d,d2ll} Expand = Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll"; - else if (Name.consume_front("rotate.")) - // nvvm.rotate.{b32,b64,right.b64} - Expand = Name == "b32" || Name == "b64" || Name == "right.b64"; else Expand = false; @@ -2261,108 +2258,6 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) { } } -static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, - Function *F, IRBuilder<> &Builder) { - Value *Rep = nullptr; - - if (Name == "abs.i" || Name == "abs.ll") { - Value *Arg = CI->getArgOperand(0); - Value *Neg = Builder.CreateNeg(Arg, "neg"); - Value *Cmp = Builder.CreateICmpSGE( - Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); - Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); - } else if (Name.starts_with("atomic.load.add.f32.p") || - Name.starts_with("atomic.load.add.f64.p")) { - Value *Ptr = CI->getArgOperand(0); - Value *Val = CI->getArgOperand(1); - Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(), - AtomicOrdering::SequentiallyConsistent); - } else if (Name.consume_front("max.") && - (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || - Name == "ui" || Name == "ull")) { - Value *Arg0 = CI->getArgOperand(0); - Value *Arg1 = CI->getArgOperand(1); - Value *Cmp = Name.starts_with("u") - ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") - : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); - Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); - } else if (Name.consume_front("min.") && - (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || - Name == "ui" || Name == "ull")) { - Value *Arg0 = CI->getArgOperand(0); - Value *Arg1 = CI->getArgOperand(1); - Value *Cmp = Name.starts_with("u") - ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") - : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); - Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); - } else if (Name == "clz.ll") { - // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. - Value *Arg = CI->getArgOperand(0); - Value *Ctlz = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, - {Arg->getType()}), - {Arg, Builder.getFalse()}, "ctlz"); - Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); - } else if (Name == "popc.ll") { - // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an - // i64. - Value *Arg = CI->getArgOperand(0); - Value *Popc = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, - {Arg->getType()}), - Arg, "ctpop"); - Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); - } else if (Name == "h2f") { - Rep = Builder.CreateCall( - Intrinsic::getDeclaration(F->getParent(), Intrinsic::convert_from_fp16, - {Builder.getFloatTy()}), - CI->getArgOperand(0), "h2f"); - } else if (Name.consume_front("bitcast.") && - (Name == "f2i" || Name == "i2f" || Name == "ll2d" || - Name == "d2ll")) { - Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType()); - } else if (Name == "rotate.b32") { - Value *Arg = CI->getOperand(0); - Value *ShiftAmt = CI->getOperand(1); - Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl, - {Arg, Arg, ShiftAmt}); - } else if (Name == "rotate.b64") { - Type *Int64Ty = Builder.getInt64Ty(); - Value *Arg = CI->getOperand(0); - Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); - Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl, - {Arg, Arg, ZExtShiftAmt}); - } else if (Name == "rotate.right.b64") { - Type *Int64Ty = Builder.getInt64Ty(); - Value *Arg = CI->getOperand(0); - Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); - Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, - {Arg, Arg, ZExtShiftAmt}); - } else { - Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); - if (IID != Intrinsic::not_intrinsic && - !F->getReturnType()->getScalarType()->isBFloatTy()) { - rename(F); - Function *NewFn = Intrinsic::getDeclaration(F->getParent(), IID); - SmallVector Args; - for (size_t I = 0; I < NewFn->arg_size(); ++I) { - Value *Arg = CI->getArgOperand(I); - Type *OldType = Arg->getType(); - Type *NewType = NewFn->getArg(I)->getType(); - Args.push_back( - (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy()) - ? Builder.CreateBitCast(Arg, NewType) - : Arg); - } - Rep = Builder.CreateCall(NewFn, Args); - if (F->getReturnType()->isIntegerTy()) - Rep = Builder.CreateBitCast(Rep, F->getReturnType()); - } - } - - return Rep; -} - static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder) { LLVMContext &C = F->getContext(); @@ -4313,8 +4208,85 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { if (!IsX86 && Name == "stackprotectorcheck") { Rep = nullptr; + } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { + Value *Arg = CI->getArgOperand(0); + Value *Neg = Builder.CreateNeg(Arg, "neg"); + Value *Cmp = Builder.CreateICmpSGE( + Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); + Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); + } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") || + Name.starts_with("atomic.load.add.f64.p"))) { + Value *Ptr = CI->getArgOperand(0); + Value *Val = CI->getArgOperand(1); + Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(), + AtomicOrdering::SequentiallyConsistent); + } else if (IsNVVM && Name.consume_front("max.") && + (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || + Name == "ui" || Name == "ull")) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Cmp = Name.starts_with("u") + ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") + : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); + Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); + } else if (IsNVVM && Name.consume_front("min.") && + (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || + Name == "ui" || Name == "ull")) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Cmp = Name.starts_with("u") + ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") + : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); + Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); + } else if (IsNVVM && Name == "clz.ll") { + // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. + Value *Arg = CI->getArgOperand(0); + Value *Ctlz = Builder.CreateCall( + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + {Arg->getType()}), + {Arg, Builder.getFalse()}, "ctlz"); + Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); + } else if (IsNVVM && Name == "popc.ll") { + // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an + // i64. + Value *Arg = CI->getArgOperand(0); + Value *Popc = Builder.CreateCall( + Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, + {Arg->getType()}), + Arg, "ctpop"); + Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); } else if (IsNVVM) { - Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder); + if (Name == "h2f") { + Rep = + Builder.CreateCall(Intrinsic::getDeclaration( + F->getParent(), Intrinsic::convert_from_fp16, + {Builder.getFloatTy()}), + CI->getArgOperand(0), "h2f"); + } else if (Name.consume_front("bitcast.") && + (Name == "f2i" || Name == "i2f" || Name == "ll2d" || + Name == "d2ll")) { + Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType()); + } else { + Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); + if (IID != Intrinsic::not_intrinsic && + !F->getReturnType()->getScalarType()->isBFloatTy()) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), IID); + SmallVector Args; + for (size_t I = 0; I < NewFn->arg_size(); ++I) { + Value *Arg = CI->getArgOperand(I); + Type *OldType = Arg->getType(); + Type *NewType = NewFn->getArg(I)->getType(); + Args.push_back((OldType->isIntegerTy() && + NewType->getScalarType()->isBFloatTy()) + ? Builder.CreateBitCast(Arg, NewType) + : Arg); + } + Rep = Builder.CreateCall(NewFn, Args); + if (F->getReturnType()->isIntegerTy()) + Rep = Builder.CreateBitCast(Rep, F->getReturnType()); + } + } } else if (IsX86) { Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder); } else if (IsARM) { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8812136733fb2..2688834221091 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -594,13 +594,20 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); - setOperationAction({ISD::ROTL, ISD::ROTR}, - {MVT::i8, MVT::i16, MVT::v2i16, MVT::i32, MVT::i64}, - Expand); - - if (STI.hasHWROT32()) - setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Legal); - + // TODO: we may consider expanding ROTL/ROTR on older GPUs. Currently on GPUs + // that don't have h/w rotation we lower them to multi-instruction assembly. + // See ROT*_sw in NVPTXIntrInfo.td + setOperationAction(ISD::ROTL, MVT::i64, Legal); + setOperationAction(ISD::ROTR, MVT::i64, Legal); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i32, Legal); + + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::v2i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::v2i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); setOperationAction(ISD::BSWAP, MVT::i16, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Custom); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index f6bbf4c2ffc02..510e4b8100311 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1665,6 +1665,167 @@ def BREV64 : "brev.b64 \t$dst, $a;", [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>; +// +// Rotate: Use ptx shf instruction if available. +// + +// 32 bit r2 = rotl r1, n +// => +// r2 = shf.l r1, r1, n +def ROTL32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTL32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[hasHWROT32]>; + +// 32 bit r2 = rotr r1, n +// => +// r2 = shf.r r1, r1, n +def ROTR32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTR32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[hasHWROT32]>; + +// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1. +def ROT32imm_sw : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + "shl.b32 \t%lhs, $src, $amt1;\n\t" + "shr.b32 \t%rhs, $src, $amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; + +def SUB_FRM_32 : SDNodeXFormgetTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def : Pat<(rotl (i32 Int32Regs:$src), (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]>; +def : Pat<(rotr (i32 Int32Regs:$src), (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; + +// 32-bit software rotate left by register. +def ROTL32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shl.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shr.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[noHWROT32]>; + +// 32-bit software rotate right by register. +def ROTR32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shr.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shl.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>, + Requires<[noHWROT32]>; + +// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1. +def ROT64imm_sw : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + "shl.b64 \t%lhs, $src, $amt1;\n\t" + "shr.b64 \t%rhs, $src, $amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; + +def SUB_FRM_64 : SDNodeXFormgetTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; +def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; + +// 64-bit software rotate left by register. +def ROTL64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "and.b32 \t%amt2, $amt, 63;\n\t" + "shl.b64 \t%lhs, $src, %amt2;\n\t" + "sub.u32 \t%amt2, 64, %amt2;\n\t" + "shr.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotl Int64Regs:$src, (i32 Int32Regs:$amt)))]>; + +def ROTR64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "and.b32 \t%amt2, $amt, 63;\n\t" + "shr.b64 \t%lhs, $src, %amt2;\n\t" + "sub.u32 \t%amt2, 64, %amt2;\n\t" + "shl.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotr Int64Regs:$src, (i32 Int32Regs:$amt)))]>; + +// +// Funnnel shift in clamp mode +// + +// Create SDNodes so they can be used in the DAG code, e.g. +// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) +def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; +def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; + +def FUNSHFLCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFL_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; + +def FUNSHFRCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFR_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>; // // BFE - bit-field extract @@ -3496,42 +3657,6 @@ def : Pat<(v2i16 (build_vector (i16 Int16Regs:$a), (i16 Int16Regs:$b))), def: Pat<(v2i16 (scalar_to_vector (i16 Int16Regs:$a))), (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; -// -// Funnel-Shift -// - -// Create SDNodes so they can be used in the DAG code, e.g. -// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) -def fshl_clamp : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; -def fshr_clamp : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; - -// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so -// no side effects. -let hasSideEffects = false in { - multiclass ShfInst { - def _i - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), - "shf." # mode # ".b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (op (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 imm:$amt)))]>, - Requires<[hasHWROT32]>; - - def _r - : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf." # mode # ".b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (op (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>, - Requires<[hasHWROT32]>; - } - - defm SHF_L_CLAMP : ShfInst<"l.clamp", fshl_clamp>; - defm SHF_R_CLAMP : ShfInst<"r.clamp", fshr_clamp>; - defm SHF_L_WRAP : ShfInst<"l.wrap", fshl>; - defm SHF_R_WRAP : ShfInst<"r.wrap", fshr>; -} - // Count leading zeros let hasSideEffects = false in { def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 2688cfbe5e824..56c551661151d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2733,9 +2733,134 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; +// rotate builtin support + +def ROTATE_B32_HW_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, + (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]> ; + +def ROTATE_B32_HW_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, + (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]> ; + +def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]> ; + +def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), + (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]> ; + +let hasSideEffects = false in { + def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + ".reg .b32 %dummy;\n\t", + "mov.b64 \t{$dst,%dummy}, $src;\n\t", + "}}"), + []> ; + + def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + ".reg .b32 %dummy;\n\t", + "mov.b64 \t{%dummy,$dst}, $src;\n\t", + "}}"), + []> ; +} + +let hasSideEffects = false in { + def PACK_TWO_INT32 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), + "mov.b64 \t$dst, {{$lo, $hi}};", []> ; +} + def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), - (V2I32toI64 (I64toI32H Int64Regs:$src), - (I64toI32L Int64Regs:$src))> ; + (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src))> ; + +// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so +// no side effects. +let hasSideEffects = false in { + def SHF_L_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_L_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_R_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + + def SHF_R_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; +} + +// HW version of rotate 64 +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), + (PACK_TWO_INT32 + (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), imm:$amt), + (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, + Requires<[hasHWROT32]>; + +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), + (PACK_TWO_INT32 + (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), + (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, + Requires<[hasHWROT32]>; + + +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), + (PACK_TWO_INT32 + (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), imm:$amt), + (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, + Requires<[hasHWROT32]>; + +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), + (PACK_TWO_INT32 + (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), + (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, + Requires<[hasHWROT32]>; + +// SW version of rotate 64 +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), + (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), + (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]>; + //----------------------------------- // Texture Intrinsics diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 43ac246055da7..7e4a4d527fc90 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -31,10 +31,6 @@ declare float @llvm.nvvm.bitcast.i2f(i32) declare i64 @llvm.nvvm.bitcast.d2ll(double) declare double @llvm.nvvm.bitcast.ll2d(i64) -declare i32 @llvm.nvvm.rotate.b32(i32, i32) -declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) -declare i64 @llvm.nvvm.rotate.b64(i64, i32) - ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -143,16 +139,4 @@ define void @bitcast(i32 %a, i64 %b, float %c, double %d) { %r4 = call double @llvm.nvvm.bitcast.ll2d(i64 %b) ret void -} - -; CHECK-LABEL: @rotate -define void @rotate(i32 %a, i64 %b) { -; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6) -; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7) -; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8) -; - %r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6) - %r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7) - %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8) - ret void -} +} \ No newline at end of file diff --git a/llvm/test/CodeGen/NVPTX/rotate.ll b/llvm/test/CodeGen/NVPTX/rotate.ll index 9ec5bcd13403b..20c7ae5908d29 100644 --- a/llvm/test/CodeGen/NVPTX/rotate.ll +++ b/llvm/test/CodeGen/NVPTX/rotate.ll @@ -9,29 +9,26 @@ declare i32 @llvm.nvvm.rotate.b32(i32, i32) declare i64 @llvm.nvvm.rotate.b64(i64, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) -declare i64 @llvm.fshl.i64(i64, i64, i64) -declare i64 @llvm.fshr.i64(i64, i64, i64) -declare i32 @llvm.fshl.i32(i32, i32, i32) -declare i32 @llvm.fshr.i32(i32, i32, i32) - - ; SM20: rotate32 ; SM35: rotate32 define i32 @rotate32(i32 %a, i32 %b) { ; SM20-LABEL: rotate32( ; SM20: { -; SM20-NEXT: .reg .b32 %r<9>; +; SM20-NEXT: .reg .b32 %r<4>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u32 %r1, [rotate32_param_0]; ; SM20-NEXT: ld.param.u32 %r2, [rotate32_param_1]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: shl.b32 %r4, %r1, %r3; -; SM20-NEXT: neg.s32 %r5, %r2; -; SM20-NEXT: and.b32 %r6, %r5, 31; -; SM20-NEXT: shr.u32 %r7, %r1, %r6; -; SM20-NEXT: or.b32 %r8, %r4, %r7; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r8; +; SM20-NEXT: { +; SM20-NEXT: .reg .b32 %lhs; +; SM20-NEXT: .reg .b32 %rhs; +; SM20-NEXT: .reg .b32 %amt2; +; SM20-NEXT: shl.b32 %lhs, %r1, %r2; +; SM20-NEXT: sub.s32 %amt2, 32, %r2; +; SM20-NEXT: shr.b32 %rhs, %r1, %amt2; +; SM20-NEXT: add.u32 %r3, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b32 [func_retval0+0], %r3; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotate32( @@ -53,36 +50,45 @@ define i32 @rotate32(i32 %a, i32 %b) { define i64 @rotate64(i64 %a, i32 %b) { ; SM20-LABEL: rotate64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotate64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotate64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<6>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [rotate64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%dummy,%r1}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%r2,%dummy}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: ld.param.u32 %r3, [rotate64_param_1]; +; SM35-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3; +; SM35-NEXT: shf.l.wrap.b32 %r5, %r1, %r2, %r3; +; SM35-NEXT: mov.b64 %rd2, {%r5, %r4}; +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b) ret i64 %val @@ -93,36 +99,45 @@ define i64 @rotate64(i64 %a, i32 %b) { define i64 @rotateright64(i64 %a, i32 %b) { ; SM20-LABEL: rotateright64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotateright64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotateright64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<6>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [rotateright64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%r1,%dummy}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: { +; SM35-NEXT: .reg .b32 %dummy; +; SM35-NEXT: mov.b64 {%dummy,%r2}, %rd1; +; SM35-NEXT: } +; SM35-NEXT: ld.param.u32 %r3, [rotateright64_param_1]; +; SM35-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3; +; SM35-NEXT: shf.r.wrap.b32 %r5, %r1, %r2, %r3; +; SM35-NEXT: mov.b64 %rd2, {%r5, %r4}; +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b) ret i64 %val @@ -133,14 +148,18 @@ define i64 @rotateright64(i64 %a, i32 %b) { define i32 @rotl0(i32 %x) { ; SM20-LABEL: rotl0( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; +; SM20-NEXT: .reg .b32 %r<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u32 %r1, [rotl0_param_0]; -; SM20-NEXT: shr.u32 %r2, %r1, 24; -; SM20-NEXT: shl.b32 %r3, %r1, 8; -; SM20-NEXT: or.b32 %r4, %r3, %r2; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b32 %lhs; +; SM20-NEXT: .reg .b32 %rhs; +; SM20-NEXT: shl.b32 %lhs, %r1, 8; +; SM20-NEXT: shr.b32 %rhs, %r1, 24; +; SM20-NEXT: add.u32 %r2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b32 [func_retval0+0], %r2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl0( @@ -158,40 +177,51 @@ define i32 @rotl0(i32 %x) { ret i32 %t2 } +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare i64 @llvm.fshr.i64(i64, i64, i64) + ; SM35: rotl64 define i64 @rotl64(i64 %a, i64 %n) { ; SM20-LABEL: rotl64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotl64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<2>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_param_0]; ; SM35-NEXT: ld.param.u32 %r1, [rotl64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: .reg .u32 %amt2; +; SM35-NEXT: and.b32 %amt2, %r1, 63; +; SM35-NEXT: shl.b64 %lhs, %rd1, %amt2; +; SM35-NEXT: sub.u32 %amt2, 64, %amt2; +; SM35-NEXT: shr.b64 %rhs, %rd1, %amt2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %n) ret i64 %val @@ -201,26 +231,34 @@ define i64 @rotl64(i64 %a, i64 %n) { define i64 @rotl64_imm(i64 %a) { ; SM20-LABEL: rotl64_imm( ; SM20: { -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0]; -; SM20-NEXT: shr.u64 %rd2, %rd1, 62; -; SM20-NEXT: shl.b64 %rd3, %rd1, 2; -; SM20-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: shl.b64 %lhs, %rd1, 2; +; SM20-NEXT: shr.b64 %rhs, %rd1, 62; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotl64_imm( ; SM35: { -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0]; -; SM35-NEXT: shr.u64 %rd2, %rd1, 62; -; SM35-NEXT: shl.b64 %rd3, %rd1, 2; -; SM35-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: shl.b64 %lhs, %rd1, 2; +; SM35-NEXT: shr.b64 %rhs, %rd1, 62; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 66) ret i64 %val @@ -230,36 +268,44 @@ define i64 @rotl64_imm(i64 %a) { define i64 @rotr64(i64 %a, i64 %n) { ; SM20-LABEL: rotr64( ; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b32 %r<2>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_param_0]; ; SM20-NEXT: ld.param.u32 %r1, [rotr64_param_1]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM20-NEXT: neg.s32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM20-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: .reg .u32 %amt2; +; SM20-NEXT: and.b32 %amt2, %r1, 63; +; SM20-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM20-NEXT: sub.u32 %amt2, 64, %amt2; +; SM20-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotr64( ; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b32 %r<2>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_param_0]; ; SM35-NEXT: ld.param.u32 %r1, [rotr64_param_1]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shr.u64 %rd2, %rd1, %r2; -; SM35-NEXT: neg.s32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd3, %rd1, %r4; -; SM35-NEXT: or.b64 %rd4, %rd2, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: .reg .u32 %amt2; +; SM35-NEXT: and.b32 %amt2, %r1, 63; +; SM35-NEXT: shr.b64 %lhs, %rd1, %amt2; +; SM35-NEXT: sub.u32 %amt2, 64, %amt2; +; SM35-NEXT: shl.b64 %rhs, %rd1, %amt2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %n) ret i64 %val @@ -269,180 +315,35 @@ define i64 @rotr64(i64 %a, i64 %n) { define i64 @rotr64_imm(i64 %a) { ; SM20-LABEL: rotr64_imm( ; SM20: { -; SM20-NEXT: .reg .b64 %rd<5>; +; SM20-NEXT: .reg .b64 %rd<3>; ; SM20-EMPTY: ; SM20-NEXT: // %bb.0: ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0]; -; SM20-NEXT: shl.b64 %rd2, %rd1, 62; -; SM20-NEXT: shr.u64 %rd3, %rd1, 2; -; SM20-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM20-NEXT: { +; SM20-NEXT: .reg .b64 %lhs; +; SM20-NEXT: .reg .b64 %rhs; +; SM20-NEXT: shl.b64 %lhs, %rd1, 62; +; SM20-NEXT: shr.b64 %rhs, %rd1, 2; +; SM20-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM20-NEXT: } +; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM20-NEXT: ret; ; ; SM35-LABEL: rotr64_imm( ; SM35: { -; SM35-NEXT: .reg .b64 %rd<5>; +; SM35-NEXT: .reg .b64 %rd<3>; ; SM35-EMPTY: ; SM35-NEXT: // %bb.0: ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0]; -; SM35-NEXT: shl.b64 %rd2, %rd1, 62; -; SM35-NEXT: shr.u64 %rd3, %rd1, 2; -; SM35-NEXT: or.b64 %rd4, %rd3, %rd2; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd4; +; SM35-NEXT: { +; SM35-NEXT: .reg .b64 %lhs; +; SM35-NEXT: .reg .b64 %rhs; +; SM35-NEXT: shl.b64 %lhs, %rd1, 62; +; SM35-NEXT: shr.b64 %rhs, %rd1, 2; +; SM35-NEXT: add.u64 %rd2, %lhs, %rhs; +; SM35-NEXT: } +; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2; ; SM35-NEXT: ret; %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66) ret i64 %val } - -define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) { -; SM20-LABEL: funnel_shift_right_32( -; SM20: { -; SM20-NEXT: .reg .b32 %r<11>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0]; -; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_2]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: ld.param.u32 %r4, [funnel_shift_right_32_param_1]; -; SM20-NEXT: shr.u32 %r5, %r4, %r3; -; SM20-NEXT: shl.b32 %r6, %r1, 1; -; SM20-NEXT: not.b32 %r7, %r2; -; SM20-NEXT: and.b32 %r8, %r7, 31; -; SM20-NEXT: shl.b32 %r9, %r6, %r8; -; SM20-NEXT: or.b32 %r10, %r9, %r5; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r10; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_right_32( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0]; -; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_1]; -; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_right_32_param_2]; -; SM35-NEXT: shf.r.wrap.b32 %r4, %r1, %r2, %r3; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r4; -; SM35-NEXT: ret; - %val = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) - ret i32 %val -} - -define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) { -; SM20-LABEL: funnel_shift_left_32( -; SM20: { -; SM20-NEXT: .reg .b32 %r<11>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0]; -; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_2]; -; SM20-NEXT: and.b32 %r3, %r2, 31; -; SM20-NEXT: shl.b32 %r4, %r1, %r3; -; SM20-NEXT: ld.param.u32 %r5, [funnel_shift_left_32_param_1]; -; SM20-NEXT: shr.u32 %r6, %r5, 1; -; SM20-NEXT: not.b32 %r7, %r2; -; SM20-NEXT: and.b32 %r8, %r7, 31; -; SM20-NEXT: shr.u32 %r9, %r6, %r8; -; SM20-NEXT: or.b32 %r10, %r4, %r9; -; SM20-NEXT: st.param.b32 [func_retval0+0], %r10; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_left_32( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0]; -; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_1]; -; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_left_32_param_2]; -; SM35-NEXT: shf.l.wrap.b32 %r4, %r1, %r2, %r3; -; SM35-NEXT: st.param.b32 [func_retval0+0], %r4; -; SM35-NEXT: ret; - %val = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) - ret i32 %val -} - -define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) { -; SM20-LABEL: funnel_shift_right_64( -; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<7>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0]; -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1]; -; SM20-NEXT: shr.u64 %rd3, %rd2, %r2; -; SM20-NEXT: shl.b64 %rd4, %rd1, 1; -; SM20-NEXT: not.b32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shl.b64 %rd5, %rd4, %r4; -; SM20-NEXT: or.b64 %rd6, %rd5, %rd3; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_right_64( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<7>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1]; -; SM35-NEXT: shr.u64 %rd3, %rd2, %r2; -; SM35-NEXT: shl.b64 %rd4, %rd1, 1; -; SM35-NEXT: not.b32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shl.b64 %rd5, %rd4, %r4; -; SM35-NEXT: or.b64 %rd6, %rd5, %rd3; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM35-NEXT: ret; - %val = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) - ret i64 %val -} - -define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) { -; SM20-LABEL: funnel_shift_left_64( -; SM20: { -; SM20-NEXT: .reg .b32 %r<5>; -; SM20-NEXT: .reg .b64 %rd<7>; -; SM20-EMPTY: -; SM20-NEXT: // %bb.0: -; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0]; -; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2]; -; SM20-NEXT: and.b32 %r2, %r1, 63; -; SM20-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM20-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1]; -; SM20-NEXT: shr.u64 %rd4, %rd3, 1; -; SM20-NEXT: not.b32 %r3, %r1; -; SM20-NEXT: and.b32 %r4, %r3, 63; -; SM20-NEXT: shr.u64 %rd5, %rd4, %r4; -; SM20-NEXT: or.b64 %rd6, %rd2, %rd5; -; SM20-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM20-NEXT: ret; -; -; SM35-LABEL: funnel_shift_left_64( -; SM35: { -; SM35-NEXT: .reg .b32 %r<5>; -; SM35-NEXT: .reg .b64 %rd<7>; -; SM35-EMPTY: -; SM35-NEXT: // %bb.0: -; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0]; -; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2]; -; SM35-NEXT: and.b32 %r2, %r1, 63; -; SM35-NEXT: shl.b64 %rd2, %rd1, %r2; -; SM35-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1]; -; SM35-NEXT: shr.u64 %rd4, %rd3, 1; -; SM35-NEXT: not.b32 %r3, %r1; -; SM35-NEXT: and.b32 %r4, %r3, 63; -; SM35-NEXT: shr.u64 %rd5, %rd4, %r4; -; SM35-NEXT: or.b64 %rd6, %rd2, %rd5; -; SM35-NEXT: st.param.b64 [func_retval0+0], %rd6; -; SM35-NEXT: ret; - %val = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) - ret i64 %val -} - diff --git a/llvm/test/CodeGen/NVPTX/rotate_64.ll b/llvm/test/CodeGen/NVPTX/rotate_64.ll index 05fdb02ac7479..64659ce1b5c56 100644 --- a/llvm/test/CodeGen/NVPTX/rotate_64.ll +++ b/llvm/test/CodeGen/NVPTX/rotate_64.ll @@ -1,38 +1,25 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %} declare i64 @llvm.nvvm.rotate.b64(i64, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) +; CHECK: rotate64 define i64 @rotate64(i64 %a, i32 %b) { -; CHECK-LABEL: rotate64( -; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<5>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [rotate64_param_0]; -; CHECK-NEXT: shr.u64 %rd2, %rd1, 61; -; CHECK-NEXT: shl.b64 %rd3, %rd1, 3; -; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; -; CHECK-NEXT: ret; +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 3; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 61; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 3) ret i64 %val } +; CHECK: rotateright64 define i64 @rotateright64(i64 %a, i32 %b) { -; CHECK-LABEL: rotateright64( -; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<5>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u64 %rd1, [rotateright64_param_0]; -; CHECK-NEXT: shl.b64 %rd2, %rd1, 61; -; CHECK-NEXT: shr.u64 %rd3, %rd1, 3; -; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; -; CHECK-NEXT: st.param.b64 [func_retval0+0], %rd4; -; CHECK-NEXT: ret; +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 61; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 3; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 3) ret i64 %val } From 97189492a1a75d39c09b0a54982f2a028c9bd652 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Wed, 25 Sep 2024 10:51:55 -0400 Subject: [PATCH 184/212] The real option name and not the alias used is displayed in msgs when using a config file (#107613) An example of this is the -mpure-code option. Without a config file being used, an error message will print `-mpure-code`. But if a config file is used, the error message will print `-mexecute-only`. --- clang/lib/Driver/Driver.cpp | 11 +++++++++++ clang/test/Driver/arm-execute-only.c | 3 +++ 2 files changed, 14 insertions(+) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 44548fa9d706f..d0c8bdba0ede9 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1005,6 +1005,17 @@ static void appendOneArg(InputArgList &Args, const Arg *Opt, Copy->setOwnsValues(Opt->getOwnsValues()); Opt->setOwnsValues(false); Args.append(Copy); + if (Opt->getAlias()) { + const Arg *Alias = Opt->getAlias(); + unsigned Index = Args.MakeIndex(Alias->getSpelling()); + auto AliasCopy = std::make_unique(Alias->getOption(), + Args.getArgString(Index), Index); + AliasCopy->getValues() = Alias->getValues(); + AliasCopy->setOwnsValues(false); + if (Alias->isClaimed()) + AliasCopy->claim(); + Copy->setAlias(std::move(AliasCopy)); + } } bool Driver::readConfigFile(StringRef FileName, diff --git a/clang/test/Driver/arm-execute-only.c b/clang/test/Driver/arm-execute-only.c index a9bf1656fd27e..d654ec364a87f 100644 --- a/clang/test/Driver/arm-execute-only.c +++ b/clang/test/Driver/arm-execute-only.c @@ -19,6 +19,9 @@ // RUN: not %clang -### --target=arm-arm-none-eabi -march=armv8-m.main -mpure-code -mno-movt %s 2>&1 \ // RUN: | FileCheck %s -check-prefix CHECK-PURE-CODE-NO-MOVT +// RUN: echo "-DABC" > %t.cfg +// RUN: not %clang -### --target=arm-arm-none-eabi -march=armv8-m.main -mpure-code -mno-movt --config %t.cfg %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix CHECK-PURE-CODE-NO-MOVT // CHECK-PURE-CODE-NO-MOVT: error: option '-mpure-code' cannot be specified with '-mno-movt' // RUN: not %clang -### --target=arm-arm-none-eabi -march=armv6-m -mexecute-only -fropi %s 2>&1 \ From 11c423f9bebc3be27722225ca8120e8775be836c Mon Sep 17 00:00:00 2001 From: Chris Cotter Date: Wed, 25 Sep 2024 10:54:31 -0400 Subject: [PATCH 185/212] [clang-tidy] Add support for bsl::optional (#101450) --- clang-tools-extra/docs/ReleaseNotes.rst | 5 + .../bugprone/unchecked-optional-access.rst | 7 +- .../bde/types/bdlb_nullablevalue.h | 38 ++++++++ .../bde/types/bsl_optional.h | 75 +++++++++++++++ .../bugprone/unchecked-optional-access.cpp | 91 +++++++++++++++++++ .../Models/UncheckedOptionalAccessModel.cpp | 67 +++++++++++--- 6 files changed, 267 insertions(+), 16 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 8f7b0b5333f3a..44b1f8c07edd3 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -130,6 +130,11 @@ Changes in existing checks usages of ``sizeof()``, ``alignof()``, and ``offsetof()`` when adding or subtracting from a pointer. +- Improved :doc:`bugprone-unchecked-optional-access + ` to support + `bsl::optional` and `bdlb::NullableValue` from + _. + - Improved :doc:`cert-flp30-c ` check to fix false positive that floating point variable is only used in increment expression. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst index 5a6aaa077d9bf..97fe37b535356 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unchecked-optional-access.rst @@ -8,9 +8,10 @@ results. Therefore, it may be more resource intensive (RAM, CPU) than the average clang-tidy check. This check identifies unsafe accesses to values contained in -``std::optional``, ``absl::optional``, ``base::Optional``, or -``folly::Optional`` objects. Below we will refer to all these types -collectively as ``optional``. +``std::optional``, ``absl::optional``, ``base::Optional``, +``folly::Optional``, ``bsl::optional``, or +``BloombergLP::bdlb::NullableValue`` objects. Below we will refer to all these +types collectively as ``optional``. An access to the value of an ``optional`` occurs when one of its ``value``, ``operator*``, or ``operator->`` member functions is invoked. To align with diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h new file mode 100644 index 0000000000000..4411bcfd60a74 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bdlb_nullablevalue.h @@ -0,0 +1,38 @@ +#ifndef LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ +#define LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ + +#include "bsl_optional.h" + +/// Mock of `bdlb::NullableValue`. +namespace BloombergLP::bdlb { + +template +class NullableValue : public bsl::optional { +public: + constexpr NullableValue() noexcept; + + constexpr NullableValue(bsl::nullopt_t) noexcept; + + NullableValue(const NullableValue &) = default; + + NullableValue(NullableValue &&) = default; + + const T &value() const &; + T &value() &; + + // 'operator bool' is inherited from bsl::optional + + constexpr bool isNull() const noexcept; + + template + constexpr T valueOr(U &&v) const &; + + // 'reset' is inherited from bsl::optional + + template NullableValue &operator=(const U &u); +}; + + +} // namespace BloombergLP::bdlb + +#endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_NULLABLEVALUE_H_ diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h new file mode 100644 index 0000000000000..7e1a129e04a55 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/unchecked-optional-access/bde/types/bsl_optional.h @@ -0,0 +1,75 @@ +#ifndef LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ +#define LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ + +/// Mock of `bsl::optional`. +namespace bsl { + +// clang-format off +template struct remove_reference { using type = T; }; +template struct remove_reference { using type = T; }; +template struct remove_reference { using type = T; }; +// clang-format on + +template +using remove_reference_t = typename remove_reference::type; + +template +constexpr T &&forward(remove_reference_t &t) noexcept; + +template +constexpr T &&forward(remove_reference_t &&t) noexcept; + +template +constexpr remove_reference_t &&move(T &&x); + +struct nullopt_t { + constexpr explicit nullopt_t() {} +}; + +constexpr nullopt_t nullopt; + +template +class optional { +public: + constexpr optional() noexcept; + + constexpr optional(nullopt_t) noexcept; + + optional(const optional &) = default; + + optional(optional &&) = default; + + const T &operator*() const &; + T &operator*() &; + const T &&operator*() const &&; + T &&operator*() &&; + + const T *operator->() const; + T *operator->(); + + const T &value() const &; + T &value() &; + const T &&value() const &&; + T &&value() &&; + + constexpr explicit operator bool() const noexcept; + constexpr bool has_value() const noexcept; + + template + constexpr T value_or(U &&v) const &; + template + T value_or(U &&v) &&; + + template + T &emplace(Args &&...args); + + void reset() noexcept; + + void swap(optional &rhs) noexcept; + + template optional &operator=(const U &u); +}; + +} // namespace bsl + +#endif // LLVM_CLANG_TOOLS_EXTRA_TEST_CLANG_TIDY_CHECKERS_INPUTS_BDE_TYPES_OPTIONAL_H_ diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp index 13a3ff52f3ebc..3167b85f0e024 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/unchecked-optional-access.cpp @@ -2,6 +2,8 @@ #include "absl/types/optional.h" #include "folly/types/Optional.h" +#include "bde/types/bsl_optional.h" +#include "bde/types/bdlb_nullablevalue.h" void unchecked_value_access(const absl::optional &opt) { opt.value(); @@ -50,6 +52,95 @@ void folly_checked_access(const folly::Optional &opt) { } } +void bsl_optional_unchecked_value_access(const bsl::optional &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + int x = *opt; + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + return; + } + + opt.value(); + x = *opt; +} + +void bsl_optional_checked_access(const bsl::optional &opt) { + if (opt.has_value()) { + opt.value(); + } + if (opt) { + opt.value(); + } +} + +void bsl_optional_value_after_swap(bsl::optional &opt1, bsl::optional &opt2) { + if (opt1) { + opt1.swap(opt2); + opt1.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: unchecked access to optional value + } +} + +void nullable_value_unchecked_value_access(const BloombergLP::bdlb::NullableValue &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + int x = *opt; + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (opt.isNull()) { + opt.value(); + } + // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + opt.value(); + } + // CHECK-MESSAGES: :[[@LINE-2]]:5: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + if (!opt) { + return; + } + + opt.value(); + x = *opt; +} + +void nullable_value_optional_checked_access(const BloombergLP::bdlb::NullableValue &opt) { + if (opt.has_value()) { + opt.value(); + } + if (opt) { + opt.value(); + } + if (!opt.isNull()) { + opt.value(); + } +} + +void nullable_value_emplaced(BloombergLP::bdlb::NullableValue &opt) { + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] + + opt.emplace(1); + opt.value(); + + opt.reset(); + opt.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: unchecked access to optional value [bugprone-unchecked-optional-access] +} + +void nullable_value_after_swap(BloombergLP::bdlb::NullableValue &opt1, BloombergLP::bdlb::NullableValue &opt2) { + if (opt1) { + opt1.swap(opt2); + opt1.value(); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: unchecked access to optional value + } +} + template void function_template_without_user(const absl::optional &opt) { opt.value(); // no-warning diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 0707aa662e4cc..70ffe92753e05 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -38,10 +38,25 @@ namespace clang { namespace dataflow { -static bool isTopLevelNamespaceWithName(const NamespaceDecl &NS, - llvm::StringRef Name) { - return NS.getDeclName().isIdentifier() && NS.getName() == Name && - NS.getParent() != nullptr && NS.getParent()->isTranslationUnit(); +// Note: the Names appear in reverse order. E.g., to check +// if NS is foo::bar::, call isFullyQualifiedNamespaceEqualTo(NS, "bar", "foo") +template +static bool isFullyQualifiedNamespaceEqualTo(const NamespaceDecl &NS, + llvm::StringRef Name, + NameTypes... Names) { + if (!(NS.getDeclName().isIdentifier() && NS.getName() == Name && + NS.getParent() != nullptr)) + return false; + + if constexpr (sizeof...(NameTypes) > 0) { + if (NS.getParent()->isTranslationUnit()) + return false; + if (const auto *NextNS = dyn_cast_or_null(NS.getParent())) + return isFullyQualifiedNamespaceEqualTo(*NextNS, Names...); + return false; + } else { + return NS.getParent()->isTranslationUnit(); + } } static bool hasOptionalClassName(const CXXRecordDecl &RD) { @@ -50,15 +65,23 @@ static bool hasOptionalClassName(const CXXRecordDecl &RD) { if (RD.getName() == "optional") { if (const auto *N = dyn_cast_or_null(RD.getDeclContext())) - return N->isStdNamespace() || isTopLevelNamespaceWithName(*N, "absl"); + return N->isStdNamespace() || + isFullyQualifiedNamespaceEqualTo(*N, "absl") || + isFullyQualifiedNamespaceEqualTo(*N, "bsl"); return false; } if (RD.getName() == "Optional") { // Check whether namespace is "::base" or "::folly". const auto *N = dyn_cast_or_null(RD.getDeclContext()); - return N != nullptr && (isTopLevelNamespaceWithName(*N, "base") || - isTopLevelNamespaceWithName(*N, "folly")); + return N != nullptr && (isFullyQualifiedNamespaceEqualTo(*N, "base") || + isFullyQualifiedNamespaceEqualTo(*N, "folly")); + } + + if (RD.getName() == "NullableValue") { + const auto *N = dyn_cast_or_null(RD.getDeclContext()); + return N != nullptr && + isFullyQualifiedNamespaceEqualTo(*N, "bdlb", "BloombergLP"); } return false; @@ -195,22 +218,25 @@ auto isOptionalOperatorCallWithName( } auto isMakeOptionalCall() { - return callExpr(callee(functionDecl(hasAnyName( - "std::make_optional", "base::make_optional", - "absl::make_optional", "folly::make_optional"))), - hasOptionalType()); + return callExpr( + callee(functionDecl(hasAnyName( + "std::make_optional", "base::make_optional", "absl::make_optional", + "folly::make_optional", "bsl::make_optional"))), + hasOptionalType()); } auto nulloptTypeDecl() { return namedDecl(hasAnyName("std::nullopt_t", "absl::nullopt_t", - "base::nullopt_t", "folly::None")); + "base::nullopt_t", "folly::None", + "bsl::nullopt_t")); } auto hasNulloptType() { return hasType(nulloptTypeDecl()); } auto inPlaceClass() { return recordDecl(hasAnyName("std::in_place_t", "absl::in_place_t", - "base::in_place_t", "folly::in_place_t")); + "base::in_place_t", "folly::in_place_t", + "bsl::in_place_t")); } auto isOptionalNulloptConstructor() { @@ -415,6 +441,15 @@ void transferOptionalHasValueCall(const CXXMemberCallExpr *CallExpr, } } +void transferOptionalIsNullCall(const CXXMemberCallExpr *CallExpr, + const MatchFinder::MatchResult &, + LatticeTransferState &State) { + if (auto *HasValueVal = getHasValue( + State.Env, getImplicitObjectLocation(*CallExpr, State.Env))) { + State.Env.setValue(*CallExpr, State.Env.makeNot(*HasValueVal)); + } +} + /// `ModelPred` builds a logical formula relating the predicate in /// `ValueOrPredExpr` to the optional's `has_value` property. void transferValueOrImpl( @@ -784,6 +819,12 @@ auto buildTransferMatchSwitch() { isOptionalMemberCallWithNameMatcher(hasName("operator bool")), transferOptionalHasValueCall) + // NullableValue::isNull + // Only NullableValue has isNull + .CaseOfCFGStmt( + isOptionalMemberCallWithNameMatcher(hasName("isNull")), + transferOptionalIsNullCall) + // optional::emplace .CaseOfCFGStmt( isOptionalMemberCallWithNameMatcher(hasName("emplace")), From 88945db4dfaefe65535ec0670e0e3d238667446b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Wed, 25 Sep 2024 16:57:08 +0200 Subject: [PATCH 186/212] [AMDGPU][SIPreEmitPeephole] pre-commit tests: mustRetainExeczBranch: use a cost model (#109816) --- .../AMDGPU/amdgpu-demote-scc-branches.ll | 365 ++++++++++++++++++ 1 file changed, 365 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll new file mode 100644 index 0000000000000..9319f0d3f5d40 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX1010 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX1030 %s + +define void @uniform_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_no_metadata: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB0_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB0_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_no_metadata: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB0_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB0_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @uniform_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_unprofitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB1_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB1_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_unprofitable: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB1_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB1_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !0 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @uniform_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) { +; GFX9-LABEL: uniform_br_profitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_cmp_lt_i32 s21, 1 +; GFX9-NEXT: s_cbranch_scc1 .LBB2_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB2_2: ; %if.end +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: uniform_br_profitable: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_cmp_lt_i32 s21, 1 +; GFX10-NEXT: s_cbranch_scc1 .LBB2_2 +; GFX10-NEXT: ; %bb.1: ; %if.then +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s19 +; GFX10-NEXT: s_mov_b32 s11, s18 +; GFX10-NEXT: s_mov_b32 s10, s17 +; GFX10-NEXT: s_mov_b32 s9, s16 +; GFX10-NEXT: s_mov_b32 s8, s7 +; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX10-NEXT: .LBB2_2: ; %if.end +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_no_metadata: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB3_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB3_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_no_metadata: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB3_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB3_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_no_metadata: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB3_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB3_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_unprofitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB4_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB4_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_unprofitable: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB4_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB4_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_unprofitable: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB4_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB4_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !0 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) { +; GFX9-LABEL: divergent_br_profitable: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_cbranch_execz .LBB5_2 +; GFX9-NEXT: ; %bb.1: ; %if.then +; GFX9-NEXT: s_mov_b32 s11, s18 +; GFX9-NEXT: s_mov_b32 s10, s17 +; GFX9-NEXT: s_mov_b32 s9, s16 +; GFX9-NEXT: s_mov_b32 s8, s7 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s19 +; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX9-NEXT: .LBB5_2: ; %if.end +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1010-LABEL: divergent_br_profitable: +; GFX1010: ; %bb.0: ; %entry +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0 +; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX1010-NEXT: s_cbranch_execz .LBB5_2 +; GFX1010-NEXT: ; %bb.1: ; %if.then +; GFX1010-NEXT: v_mov_b32_e32 v0, s6 +; GFX1010-NEXT: v_mov_b32_e32 v1, s19 +; GFX1010-NEXT: s_mov_b32 s11, s18 +; GFX1010-NEXT: s_mov_b32 s10, s17 +; GFX1010-NEXT: s_mov_b32 s9, s16 +; GFX1010-NEXT: s_mov_b32 s8, s7 +; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1010-NEXT: .LBB5_2: ; %if.end +; GFX1010-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-LABEL: divergent_br_profitable: +; GFX1030: ; %bb.0: ; %entry +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_mov_b32 s4, exec_lo +; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0 +; GFX1030-NEXT: s_cbranch_execz .LBB5_2 +; GFX1030-NEXT: ; %bb.1: ; %if.then +; GFX1030-NEXT: v_mov_b32_e32 v0, s6 +; GFX1030-NEXT: v_mov_b32_e32 v1, s19 +; GFX1030-NEXT: s_mov_b32 s11, s18 +; GFX1030-NEXT: s_mov_b32 s10, s17 +; GFX1030-NEXT: s_mov_b32 s9, s16 +; GFX1030-NEXT: s_mov_b32 s8, s7 +; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen +; GFX1030-NEXT: .LBB5_2: ; %if.end +; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-NEXT: s_setpc_b64 s[30:31] +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + call void @llvm.amdgcn.s.waitcnt(i32 0) + ret void +} + +declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) +declare void @llvm.amdgcn.s.waitcnt(i32) +declare i32 @llvm.amdgcn.workitem.id.x() + +!0 = !{!"branch_weights", i32 1000, i32 1000} +!1 = !{!"branch_weights", i32 2000, i32 1} From 808c498f52c8ff7724f762dab351600864023098 Mon Sep 17 00:00:00 2001 From: lntue Date: Wed, 25 Sep 2024 10:58:08 -0400 Subject: [PATCH 187/212] Revert "[libc][math] Implement issignaling macro." (#109992) Reverts llvm/llvm-project#109615 --- .../llvm-libc-macros/math-function-macros.h | 4 -- libc/test/include/CMakeLists.txt | 45 ----------------- libc/test/include/IsSignalingTest.h | 49 ------------------- libc/test/include/issignaling_test.c | 24 --------- libc/test/include/issignaling_test.cpp | 18 ------- libc/test/include/issignalingf_test.cpp | 18 ------- libc/test/include/issignalingl_test.cpp | 18 ------- 7 files changed, 176 deletions(-) delete mode 100644 libc/test/include/IsSignalingTest.h delete mode 100644 libc/test/include/issignaling_test.c delete mode 100644 libc/test/include/issignaling_test.cpp delete mode 100644 libc/test/include/issignalingf_test.cpp delete mode 100644 libc/test/include/issignalingl_test.cpp diff --git a/libc/include/llvm-libc-macros/math-function-macros.h b/libc/include/llvm-libc-macros/math-function-macros.h index c740eb2d18825..68f9ff9d1c033 100644 --- a/libc/include/llvm-libc-macros/math-function-macros.h +++ b/libc/include/llvm-libc-macros/math-function-macros.h @@ -20,9 +20,5 @@ __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, x) #define isnormal(x) __builtin_isnormal(x) #define issubnormal(x) (fpclassify(x) == FP_SUBNORMAL) -#if (defined(__clang__) && __clang_major__ >= 18) || \ - (defined(__GNUC__) && __GNUC__ >= 13) -#define issignaling(x) __builtin_issignaling(x) -#endif #endif // LLVM_LIBC_MACROS_MATH_FUNCTION_MACROS_H diff --git a/libc/test/include/CMakeLists.txt b/libc/test/include/CMakeLists.txt index dd8f21bdd07ae..12692eed417c4 100644 --- a/libc/test/include/CMakeLists.txt +++ b/libc/test/include/CMakeLists.txt @@ -81,36 +81,6 @@ add_libc_test( libc.include.llvm-libc-macros.stdckdint_macros ) -add_libc_test( - issignaling_test - SUITE - libc_include_tests - SRCS - issignaling_test.cpp - DEPENDS - libc.include.llvm-libc-macros.math_function_macros -) - -add_libc_test( - issignalingf_test - SUITE - libc_include_tests - SRCS - issignalingf_test.cpp - DEPENDS - libc.include.llvm-libc-macros.math_function_macros -) - -add_libc_test( - issignalingl_test - SUITE - libc_include_tests - SRCS - issignalingl_test.cpp - DEPENDS - libc.include.llvm-libc-macros.math_function_macros -) - add_libc_test( issubnormal_test SUITE @@ -396,21 +366,6 @@ add_libc_test( libc.include.llvm-libc-macros.math_function_macros ) -add_libc_test( - issignaling_c_test - C_TEST - UNIT_TEST_ONLY - SUITE - libc_include_tests - SRCS - issignaling_test.c - COMPILE_OPTIONS - -Wall - -Werror - DEPENDS - libc.include.llvm-libc-macros.math_function_macros -) - add_libc_test( isinf_c_test C_TEST diff --git a/libc/test/include/IsSignalingTest.h b/libc/test/include/IsSignalingTest.h deleted file mode 100644 index c369cfe090ed3..0000000000000 --- a/libc/test/include/IsSignalingTest.h +++ /dev/null @@ -1,49 +0,0 @@ -//===-- Utility class to test the issignaling macro ------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H -#define LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H - -#include "test/UnitTest/FPMatcher.h" -#include "test/UnitTest/Test.h" - -#include "include/llvm-libc-macros/math-function-macros.h" - -template -class IsSignalingTest : public LIBC_NAMESPACE::testing::Test { - DECLARE_SPECIAL_CONSTANTS(T) - -public: - typedef int (*IsSignalingFunc)(T); - - void testSpecialNumbers(IsSignalingFunc func) { - EXPECT_EQ(func(aNaN), 0); - EXPECT_EQ(func(neg_aNaN), 0); - EXPECT_EQ(func(sNaN), 1); - EXPECT_EQ(func(neg_sNaN), 1); - EXPECT_EQ(func(inf), 0); - EXPECT_EQ(func(neg_inf), 0); - EXPECT_EQ(func(min_normal), 0); - EXPECT_EQ(func(max_normal), 0); - EXPECT_EQ(func(neg_max_normal), 0); - EXPECT_EQ(func(min_denormal), 0); - EXPECT_EQ(func(neg_min_denormal), 0); - EXPECT_EQ(func(max_denormal), 0); - EXPECT_EQ(func(zero), 0); - EXPECT_EQ(func(neg_zero), 0); - } -}; - -#define LIST_ISSIGNALING_TESTS(T, func) \ - using LlvmLibcIsSignalingTest = IsSignalingTest; \ - TEST_F(LlvmLibcIsSignalingTest, SpecialNumbers) { \ - auto issignaling_func = [](T x) { return func(x); }; \ - testSpecialNumbers(issignaling_func); \ - } - -#endif // LLVM_LIBC_TEST_INCLUDE_MATH_ISSIGNALING_H diff --git a/libc/test/include/issignaling_test.c b/libc/test/include/issignaling_test.c deleted file mode 100644 index 2c080696404ae..0000000000000 --- a/libc/test/include/issignaling_test.c +++ /dev/null @@ -1,24 +0,0 @@ -//===-- Unittests for issignaling macro -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-function-macros.h" - -#include - -// TODO: enable the test unconditionally when issignaling macro is fixed for -// older compiler -int main(void) { -#ifdef issignaling - assert(issignaling(__builtin_nans("")) == 1); - assert(issignaling(__builtin_nansf("")) == 1); - assert(issignaling(__builtin_nansl("")) == 1); - assert(issignaling(1.819f) == 0); - assert(issignaling(-1.726) == 0); - assert(issignaling(1.426L) == 0); -#endif - return 0; -} diff --git a/libc/test/include/issignaling_test.cpp b/libc/test/include/issignaling_test.cpp deleted file mode 100644 index ef007feb0a633..0000000000000 --- a/libc/test/include/issignaling_test.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- Unittest for issignaling[d] macro ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "IsSignalingTest.h" -#include "include/llvm-libc-macros/math-function-macros.h" - -// TODO: enable the test unconditionally when issignaling macro is fixed for -// older compiler -#ifdef issignaling -LIST_ISSIGNALING_TESTS(double, issignaling) -#else -int main() { return 0; } -#endif diff --git a/libc/test/include/issignalingf_test.cpp b/libc/test/include/issignalingf_test.cpp deleted file mode 100644 index 9b236f2bb84d7..0000000000000 --- a/libc/test/include/issignalingf_test.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- Unittest for issignaling[f] macro ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "IsSignalingTest.h" -#include "include/llvm-libc-macros/math-function-macros.h" - -// TODO: enable the test unconditionally when issignaling macro is fixed for -// older compiler -#ifdef issignaling -LIST_ISSIGNALING_TESTS(float, issignaling) -#else -int main() { return 0; } -#endif diff --git a/libc/test/include/issignalingl_test.cpp b/libc/test/include/issignalingl_test.cpp deleted file mode 100644 index 35482cb4b0202..0000000000000 --- a/libc/test/include/issignalingl_test.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- Unittest for issignaling[l] macro ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDSList-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "IsSignalingTest.h" -#include "include/llvm-libc-macros/math-function-macros.h" - -// TODO: enable the test unconditionally when issignaling macro is fixed for -// older compiler -#ifdef issignaling -LIST_ISSIGNALING_TESTS(long double, issignaling) -#else -int main() { return 0; } -#endif From 556ec4a7261447d13703816cd3730a891441e52c Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 25 Sep 2024 08:17:55 -0700 Subject: [PATCH 188/212] [SLP] Pass operand info to getCmpSelInstrInfo (#109998) Depending on the constant, selects with constant arms can have highly varying cost. This adjusts SLP to use the new API introduced in d2885743. Fixes https://github.com/llvm/llvm-project/issues/109466. --- .../lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 ++++---- .../RISCV/select-profitability.ll | 18 +++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 154fed4a8ad2e..7c3741db40e75 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10669,8 +10669,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost ScalarCost = TTI->getCmpSelInstrCost( E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred, - CostKind, {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_AnyValue, TTI::OP_None}, VI); + CostKind, getOperandInfo(VI->getOperand(0)), + getOperandInfo(VI->getOperand(1)), VI); InstructionCost IntrinsicCost = GetMinMaxCost(OrigScalarTy, VI); if (IntrinsicCost.isValid()) ScalarCost = IntrinsicCost; @@ -10682,8 +10682,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VecPred, - CostKind, {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_AnyValue, TTI::OP_None}, VL0); + CostKind, getOperandInfo(E->getOperand(0)), + getOperandInfo(E->getOperand(1)), VL0); if (auto *SI = dyn_cast(VL0)) { auto *CondType = getWidenedType(SI->getCondition()->getType(), VL.size()); diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll index 4496b19fa200c..9cfc5f86cb014 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll @@ -31,13 +31,17 @@ define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext define i32 @pow2_zero_variable_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) { ; CHECK-LABEL: define i32 @pow2_zero_variable_shift( ; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> , <4 x i32> zeroinitializer -; CHECK-NEXT: [[OR_RDX2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[T39_I0:%.*]] = icmp eq i16 [[A]], 1 +; CHECK-NEXT: [[T39_I1:%.*]] = icmp eq i16 [[B]], 1 +; CHECK-NEXT: [[T39_I2:%.*]] = icmp eq i16 [[C]], 1 +; CHECK-NEXT: [[T39_I3:%.*]] = icmp eq i16 [[D]], 1 +; CHECK-NEXT: [[T40_I0:%.*]] = select i1 [[T39_I0]], i32 524288, i32 0 +; CHECK-NEXT: [[T40_I1:%.*]] = select i1 [[T39_I1]], i32 262144, i32 0 +; CHECK-NEXT: [[T40_I2:%.*]] = select i1 [[T39_I2]], i32 131072, i32 0 +; CHECK-NEXT: [[T40_I3:%.*]] = select i1 [[T39_I3]], i32 65536, i32 0 +; CHECK-NEXT: [[OR_RDX0:%.*]] = or i32 [[T40_I0]], [[T40_I1]] +; CHECK-NEXT: [[OR_RDX1:%.*]] = or i32 [[T40_I2]], [[T40_I3]] +; CHECK-NEXT: [[OR_RDX2:%.*]] = or i32 [[OR_RDX0]], [[OR_RDX1]] ; CHECK-NEXT: ret i32 [[OR_RDX2]] ; %t39.i0 = icmp eq i16 %a, 1 From aae7ac668588192e21a2435da0229fa0f49c231f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 25 Sep 2024 16:44:41 +0100 Subject: [PATCH 189/212] [VPlan] Remove VPIteration, update to use directly VPLane instead (NFC) After 8ec406757cb92 (https://github.com/llvm/llvm-project/pull/95842), only the lane part of VPIteration is used. Simplify the code by replacing remaining uses of VPIteration with VPLane directly. --- .../Transforms/Vectorize/LoopVectorize.cpp | 44 ++++---- llvm/lib/Transforms/Vectorize/VPlan.cpp | 53 +++++---- llvm/lib/Transforms/Vectorize/VPlan.h | 46 +++----- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 104 +++++++++--------- 4 files changed, 113 insertions(+), 134 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cac0b57fc6964..db4631e19c11d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -505,8 +505,7 @@ class InnerLoopVectorizer { /// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p /// Instr's operands. void scalarizeInstruction(const Instruction *Instr, - VPReplicateRecipe *RepRecipe, - const VPIteration &Instance, + VPReplicateRecipe *RepRecipe, const VPLane &Lane, VPTransformState &State); /// Fix the non-induction PHIs in \p Plan. @@ -2322,14 +2321,14 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) { void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, - const VPIteration &Instance, + const VPLane &Lane, VPTransformState &State) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // llvm.experimental.noalias.scope.decl intrinsics must only be duplicated for // the first lane and part. if (isa(Instr)) - if (!Instance.isFirstIteration()) + if (!Lane.isFirstLane()) return; // Does this instruction return a value ? @@ -2354,18 +2353,18 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (const auto &I : enumerate(RepRecipe->operands())) { - auto InputInstance = Instance; + auto InputLane = Lane; VPValue *Operand = I.value(); if (vputils::isUniformAfterVectorization(Operand)) - InputInstance.Lane = VPLane::getFirstLane(); - Cloned->setOperand(I.index(), State.get(Operand, InputInstance)); + InputLane = VPLane::getFirstLane(); + Cloned->setOperand(I.index(), State.get(Operand, InputLane)); } State.addNewMetadata(Cloned, Instr); // Place the cloned scalar in the new loop. State.Builder.Insert(Cloned); - State.set(RepRecipe, Cloned, Instance); + State.set(RepRecipe, Cloned, Lane); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) @@ -2784,7 +2783,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep()); assert(StepVPV && "step must have been expanded during VPlan execution"); Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue() - : State.get(StepVPV, {0, 0}); + : State.get(StepVPV, VPLane(0)); Value *Escape = emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, II.getKind(), II.getInductionBinOp()); @@ -7435,8 +7434,7 @@ static void createAndCollectMergePhiForReduction( auto *PhiR = cast(RedResult->getOperand(0)); const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - Value *FinalValue = - State.get(RedResult, VPIteration(0, VPLane::getFirstLane())); + Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane())); auto *ResumePhi = dyn_cast(PhiR->getStartValue()->getUnderlyingValue()); if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind( @@ -7525,7 +7523,7 @@ LoopVectorizationPlanner::executePlan( BestVPlan.getPreheader()->execute(&State); } if (!ILV.getTripCount()) - ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0})); + ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0))); else assert(IsEpilogueVectorization && "should only re-use the existing trip " "count during epilogue vectorization"); @@ -9409,48 +9407,48 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( } void VPDerivedIVRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "VPDerivedIVRecipe being replicated."); + assert(!State.Lane && "VPDerivedIVRecipe being replicated."); // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(State.Builder); if (FPBinOp) State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags()); - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); - Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0)); + Value *Step = State.get(getStepValue(), VPLane(0)); + Value *CanonicalIV = State.get(getOperand(1), VPLane(0)); Value *DerivedIV = emitTransformedIndex( State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); DerivedIV->setName("offset.idx"); assert(DerivedIV != CanonicalIV && "IV didn't need transforming?"); - State.set(this, DerivedIV, VPIteration(0, 0)); + State.set(this, DerivedIV, VPLane(0)); } void VPReplicateRecipe::execute(VPTransformState &State) { Instruction *UI = getUnderlyingInstr(); - if (State.Instance) { // Generate a single instance. + if (State.Lane) { // Generate a single instance. assert((State.VF.isScalar() || !isUniform()) && "uniform recipe shouldn't be predicated"); assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - State.ILV->scalarizeInstruction(UI, this, *State.Instance, State); + State.ILV->scalarizeInstruction(UI, this, *State.Lane, State); // Insert scalar instance packing it into a vector. if (State.VF.isVector() && shouldPack()) { // If we're constructing lane 0, initialize to start from poison. - if (State.Instance->Lane.isFirstLane()) { + if (State.Lane->isFirstLane()) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Poison = PoisonValue::get( VectorType::get(UI->getType(), State.VF)); State.set(this, Poison); } - State.packScalarIntoVectorValue(this, *State.Instance); + State.packScalarIntoVectorValue(this, *State.Lane); } return; } if (IsUniform) { // Uniform within VL means we need to generate lane 0. - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(0), State); return; } @@ -9459,7 +9457,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (isa(UI) && vputils::isUniformAfterVectorization(getOperand(1))) { auto Lane = VPLane::getLastLaneForVF(State.VF); - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State); return; } @@ -9467,7 +9465,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); const unsigned EndLane = State.VF.getKnownMinValue(); for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State); + State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State); } // Determine how to lower the scalar epilogue, which depends on 1) optimising diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 5e4d487261c6f..6ddbfcf0ecfe5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -228,28 +228,27 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan), LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {} -Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { +Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) { if (Def->isLiveIn()) return Def->getLiveInIRValue(); - if (hasScalarValue(Def, Instance)) { - return Data.VPV2Scalars[Def][Instance.Lane.mapToCacheIndex(VF)]; - } - if (!Instance.Lane.isFirstLane() && - vputils::isUniformAfterVectorization(Def) && - hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) { + if (hasScalarValue(Def, Lane)) + return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)]; + + if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) && + hasScalarValue(Def, VPLane::getFirstLane())) { return Data.VPV2Scalars[Def][0]; } assert(hasVectorValue(Def)); auto *VecPart = Data.VPV2Vector[Def]; if (!VecPart->getType()->isVectorTy()) { - assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar"); + assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar"); return VecPart; } // TODO: Cache created scalar values. - Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF); - auto *Extract = Builder.CreateExtractElement(VecPart, Lane); + Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF); + auto *Extract = Builder.CreateExtractElement(VecPart, LaneV); // set(Def, Extract, Instance); return Extract; } @@ -258,11 +257,11 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { if (NeedsScalar) { assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) || !vputils::onlyFirstLaneUsed(Def) || - (hasScalarValue(Def, VPIteration(0, 0)) && + (hasScalarValue(Def, VPLane(0)) && Data.VPV2Scalars[Def].size() == 1)) && "Trying to access a single scalar per part but has multiple scalars " "per part."); - return get(Def, VPIteration(0, 0)); + return get(Def, VPLane(0)); } // If Values have been set for this Def return the one relevant for \p Part. @@ -289,7 +288,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { return Shuf; }; - if (!hasScalarValue(Def, {0, 0})) { + if (!hasScalarValue(Def, {0})) { assert(Def->isLiveIn() && "expected a live-in"); Value *IRV = Def->getLiveInIRValue(); Value *B = GetBroadcastInstrs(IRV); @@ -297,7 +296,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { return B; } - Value *ScalarValue = get(Def, {0, 0}); + Value *ScalarValue = get(Def, VPLane(0)); // If we aren't vectorizing, we can just copy the scalar map values over // to the vector map. if (VF.isScalar()) { @@ -307,9 +306,9 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { bool IsUniform = vputils::isUniformAfterVectorization(Def); - unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1; + VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1); // Check if there is a scalar value for the selected lane. - if (!hasScalarValue(Def, {0, LastLane})) { + if (!hasScalarValue(Def, LastLane)) { // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and // VPExpandSCEVRecipes can also be uniform. assert((isa(Def->getDefiningRecipe()) || @@ -320,7 +319,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { LastLane = 0; } - auto *LastInst = cast(get(Def, {0, LastLane})); + auto *LastInst = cast(get(Def, LastLane)); // Set the insert point after the last scalarized instruction or after the // last PHI, if LastInst is a PHI. This ensures the insertelement sequence // will directly follow the scalar definitions. @@ -347,7 +346,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF)); set(Def, Undef); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) - packScalarIntoVectorValue(Def, {0, Lane}); + packScalarIntoVectorValue(Def, Lane); VectorValue = get(Def); } Builder.restoreIP(OldIP); @@ -401,11 +400,11 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) { } void VPTransformState::packScalarIntoVectorValue(VPValue *Def, - const VPIteration &Instance) { - Value *ScalarInst = get(Def, Instance); + const VPLane &Lane) { + Value *ScalarInst = get(Def, Lane); Value *VectorValue = get(Def); - VectorValue = Builder.CreateInsertElement( - VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF)); + VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst, + Lane.getAsRuntimeExpr(Builder, VF)); set(Def, VectorValue); } @@ -483,7 +482,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) { } void VPBasicBlock::execute(VPTransformState *State) { - bool Replica = State->Instance && !State->Instance->isFirstIteration(); + bool Replica = bool(State->Lane); VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. @@ -765,14 +764,14 @@ void VPRegionBlock::execute(VPTransformState *State) { return; } - assert(!State->Instance && "Replicating a Region with non-null instance."); + assert(!State->Lane && "Replicating a Region with non-null instance."); // Enter replicating mode. - State->Instance = VPIteration(0, 0); assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); + State->Lane = VPLane(0); for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; ++Lane) { - State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); + State->Lane = VPLane(Lane, VPLane::Kind::First); // Visit the VPBlocks connected to \p this, starting from it. for (VPBlockBase *Block : RPOT) { LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); @@ -781,7 +780,7 @@ void VPRegionBlock::execute(VPTransformState *State) { } // Exit replicating mode. - State->Instance.reset(); + State->Lane.reset(); } InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c886a39aec76e..bbcfaf9e19cd0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -172,6 +172,7 @@ class VPLane { Kind LaneKind; public: + VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {} VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {} static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); } @@ -230,23 +231,6 @@ class VPLane { } }; -/// VPIteration represents a single point in the iteration space of the output -/// (vectorized and/or unrolled) IR loop. -struct VPIteration { - /// in [0..UF) - unsigned Part; - - VPLane Lane; - - VPIteration(unsigned Part, unsigned Lane, - VPLane::Kind Kind = VPLane::Kind::First) - : Part(Part), Lane(Lane, Kind) {} - - VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {} - - bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); } -}; - /// VPTransformState holds information passed down when "executing" a VPlan, /// needed for generating the output IR. struct VPTransformState { @@ -257,10 +241,10 @@ struct VPTransformState { /// The chosen Vectorization Factor of the loop being vectorized. ElementCount VF; - /// Hold the indices to generate specific scalar instructions. Null indicates + /// Hold the index to generate specific scalar instructions. Null indicates /// that all instances are to be generated, using either scalar or vector /// instructions. - std::optional Instance; + std::optional Lane; struct DataState { // Each value from the original loop, when vectorized, is represented by a @@ -275,15 +259,15 @@ struct VPTransformState { Value *get(VPValue *Def, bool IsScalar = false); /// Get the generated Value for a given VPValue and given Part and Lane. - Value *get(VPValue *Def, const VPIteration &Instance); + Value *get(VPValue *Def, const VPLane &Lane); bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); } - bool hasScalarValue(VPValue *Def, VPIteration Instance) { + bool hasScalarValue(VPValue *Def, VPLane Lane) { auto I = Data.VPV2Scalars.find(Def); if (I == Data.VPV2Scalars.end()) return false; - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); return CacheIdx < I->second.size() && I->second[CacheIdx]; } @@ -291,7 +275,7 @@ struct VPTransformState { /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0. void set(VPValue *Def, Value *V, bool IsScalar = false) { if (IsScalar) { - set(Def, V, VPIteration(0, 0)); + set(Def, V, VPLane(0)); return; } assert((VF.isScalar() || V->getType()->isVectorTy()) && @@ -305,23 +289,23 @@ struct VPTransformState { Data.VPV2Vector[Def] = V; } - /// Set the generated scalar \p V for \p Def and the given \p Instance. - void set(VPValue *Def, Value *V, const VPIteration &Instance) { + /// Set the generated scalar \p V for \p Def and the given \p Lane. + void set(VPValue *Def, Value *V, const VPLane &Lane) { auto Iter = Data.VPV2Scalars.insert({Def, {}}); auto &Scalars = Iter.first->second; - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); if (Scalars.size() <= CacheIdx) Scalars.resize(CacheIdx + 1); assert(!Scalars[CacheIdx] && "should overwrite existing value"); Scalars[CacheIdx] = V; } - /// Reset an existing scalar value for \p Def and a given \p Instance. - void reset(VPValue *Def, Value *V, const VPIteration &Instance) { + /// Reset an existing scalar value for \p Def and a given \p Lane. + void reset(VPValue *Def, Value *V, const VPLane &Lane) { auto Iter = Data.VPV2Scalars.find(Def); assert(Iter != Data.VPV2Scalars.end() && "need to overwrite existing value"); - unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); + unsigned CacheIdx = Lane.mapToCacheIndex(VF); assert(CacheIdx < Iter->second.size() && "need to overwrite existing value"); Iter->second[CacheIdx] = V; @@ -345,7 +329,7 @@ struct VPTransformState { void setDebugLocFrom(DebugLoc DL); /// Construct the vector value of a scalarized value \p V one lane at a time. - void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance); + void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane); /// Hold state information used when constructing the CFG of the output IR, /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. @@ -1289,7 +1273,7 @@ class VPInstruction : public VPRecipeWithIRFlags, /// Utility methods serving execute(): generates a scalar single instance of /// the modeled instruction for a given lane. \returns the scalar generated /// value for lane \p Lane. - Value *generatePerLane(VPTransformState &State, const VPIteration &Lane); + Value *generatePerLane(VPTransformState &State, const VPLane &Lane); #if !defined(NDEBUG) /// Return true if the VPInstruction is a floating point math operation, i.e. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 5d1a13086e9f9..dacba152611c1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -209,7 +209,7 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { ? MiddleVPBB : ExitingVPBB; BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; - Value *V = State.get(ExitValue, VPIteration(0, 0)); + Value *V = State.get(ExitValue, VPLane(0)); if (Phi->getBasicBlockIndex(PredBB) != -1) Phi->setIncomingValueForBlock(PredBB, V); else @@ -390,7 +390,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { } Value *VPInstruction::generatePerLane(VPTransformState &State, - const VPIteration &Lane) { + const VPLane &Lane) { IRBuilderBase &Builder = State.Builder; assert(getOpcode() == VPInstruction::PtrAdd && @@ -432,9 +432,9 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::ActiveLaneMask: { // Get first lane of vector induction variable. - Value *VIVElem0 = State.get(getOperand(0), VPIteration(0, 0)); + Value *VIVElem0 = State.get(getOperand(0), VPLane(0)); // Get the original loop tripcount. - Value *ScalarTC = State.get(getOperand(1), VPIteration(0, 0)); + Value *ScalarTC = State.get(getOperand(1), VPLane(0)); // If this part of the active lane mask is scalar, generate the CMP directly // to avoid unnecessary extracts. @@ -469,7 +469,7 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::CalculateTripCountMinusVF: { unsigned UF = getParent()->getPlan()->getUF(); - Value *ScalarTC = State.get(getOperand(0), {0, 0}); + Value *ScalarTC = State.get(getOperand(0), VPLane(0)); Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF); Value *Sub = Builder.CreateSub(ScalarTC, Step); Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); @@ -479,7 +479,7 @@ Value *VPInstruction::generate(VPTransformState &State) { case VPInstruction::ExplicitVectorLength: { // TODO: Restructure this code with an explicit remainder loop, vsetvli can // be outside of the main loop. - Value *AVL = State.get(getOperand(0), VPIteration(0, 0)); + Value *AVL = State.get(getOperand(0), /*IsScalar*/ true); // Compute EVL assert(AVL->getType()->isIntegerTy() && "Requested vector length should be an integer."); @@ -494,7 +494,7 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::CanonicalIVIncrementForPart: { unsigned Part = getUnrollPart(*this); - auto *IV = State.get(getOperand(0), VPIteration(0, 0)); + auto *IV = State.get(getOperand(0), VPLane(0)); assert(Part != 0 && "Must have a positive part"); // The canonical IV is incremented by the vectorization factor (num of // SIMD elements) times the unroll part. @@ -503,7 +503,7 @@ Value *VPInstruction::generate(VPTransformState &State) { hasNoSignedWrap()); } case VPInstruction::BranchOnCond: { - Value *Cond = State.get(getOperand(0), VPIteration(0, 0)); + Value *Cond = State.get(getOperand(0), VPLane(0)); // Replace the temporary unreachable terminator with a new conditional // branch, hooking it up to backward destination for exiting blocks now and // to forward destination(s) later when they are created. @@ -625,8 +625,7 @@ Value *VPInstruction::generate(VPTransformState &State) { assert(Offset <= State.VF.getKnownMinValue() && "invalid offset to extract from"); // Extract lane VF - Offset from the operand. - Res = State.get(getOperand(0), - VPIteration(0, VPLane::getLaneFromEnd(State.VF, Offset))); + Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset)); } else { assert(Offset <= 1 && "invalid offset to extract from"); Res = State.get(getOperand(0)); @@ -692,7 +691,7 @@ bool VPInstruction::isFPMathOp() const { #endif void VPInstruction::execute(VPTransformState &State) { - assert(!State.Instance && "VPInstruction executing an Instance"); + assert(!State.Lane && "VPInstruction executing an Lane"); IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); assert((hasFastMathFlags() == isFPMathOp() || getOpcode() == Instruction::Select) && @@ -707,9 +706,9 @@ void VPInstruction::execute(VPTransformState &State) { if (GeneratesPerAllLanes) { for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); Lane != NumLanes; ++Lane) { - Value *GeneratedValue = generatePerLane(State, VPIteration(0, Lane)); + Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); assert(GeneratedValue && "generatePerLane must produce a value"); - State.set(this, GeneratedValue, VPIteration(0, Lane)); + State.set(this, GeneratedValue, VPLane(Lane)); } return; } @@ -857,7 +856,7 @@ void VPIRInstruction::execute(VPTransformState &State) { // Set insertion point in PredBB in case an extract needs to be generated. // TODO: Model extracts explicitly. State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); - Value *V = State.get(ExitValue, VPIteration(0, Lane)); + Value *V = State.get(ExitValue, VPLane(Lane)); auto *Phi = cast(&I); Phi->addIncoming(V, PredBB); } @@ -905,12 +904,12 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { Value *Arg; if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) - Arg = State.get(I.value(), VPIteration(0, 0)); + Arg = State.get(I.value(), VPLane(0)); // Some vectorized function variants may also take a scalar argument, // e.g. linear parameters for pointers. This needs to be the scalar value // from the start of the respective part when interleaving. else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) - Arg = State.get(I.value(), VPIteration(0, 0)); + Arg = State.get(I.value(), VPLane(0)); else Arg = State.get(I.value()); if (UseIntrinsic && @@ -1045,7 +1044,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. auto *InvarCond = - isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; + isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr; Value *Cond = InvarCond ? InvarCond : State.get(getCond()); Value *Op0 = State.get(getOperand(1)); @@ -1410,7 +1409,7 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Int or FP induction being replicated."); + assert(!State.Lane && "Int or FP induction being replicated."); Value *Start = getStartValue()->getLiveInIRValue(); const InductionDescriptor &ID = getInductionDescriptor(); @@ -1429,7 +1428,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); // Now do the actual transformations, and start with fetching the step value. - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + Value *Step = State.get(getStepValue(), VPLane(0)); assert((isa(EntryVal) || isa(EntryVal)) && "Expected either an induction phi-node or a truncate of it!"); @@ -1472,7 +1471,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { // Multiply the vectorization factor by the step using integer or // floating-point arithmetic as appropriate. Type *StepType = Step->getType(); - Value *RuntimeVF = State.get(getVFValue(), {0, 0}); + Value *RuntimeVF = State.get(getVFValue(), VPLane(0)); if (Step->getType()->isFloatingPointTy()) RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType); else @@ -1569,8 +1568,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { /// Compute scalar induction steps. \p ScalarIV is the scalar induction /// variable on which to base the steps, \p Step is the size of the step. - Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0)); - Value *Step = State.get(getStepValue(), VPIteration(0, 0)); + Value *BaseIV = State.get(getOperand(0), VPLane(0)); + Value *Step = State.get(getStepValue(), VPLane(0)); IRBuilderBase &Builder = State.Builder; // Ensure step has the same type as that of scalar IV. @@ -1607,8 +1606,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { unsigned StartLane = 0; unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); - if (State.Instance) { - StartLane = State.Instance->Lane.getKnownLane(); + if (State.Lane) { + StartLane = State.Lane->getKnownLane(); EndLane = StartLane + 1; } Value *StartIdx0 = @@ -1640,7 +1639,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { "scalable"); auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); - State.set(this, Add, VPIteration(0, Lane)); + State.set(this, Add, VPLane(Lane)); } } @@ -1678,7 +1677,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // the lane-zero scalar value. SmallVector Ops; for (unsigned I = 0, E = getNumOperands(); I != E; I++) - Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); + Ops.push_back(State.get(getOperand(I), VPLane(0))); auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], @@ -1691,9 +1690,8 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // produce a vector of pointers unless VF is scalar. // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. - auto *Ptr = isPointerLoopInvariant() - ? State.get(getOperand(0), VPIteration(0, 0)) - : State.get(getOperand(0)); + auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0)) + : State.get(getOperand(0)); // Collect all the indices for the new GEP. If any index is // loop-invariant, we won't broadcast it. @@ -1701,7 +1699,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { for (unsigned I = 1, E = getNumOperands(); I < E; I++) { VPValue *Operand = getOperand(I); if (isIndexLoopInvariant(I - 1)) - Indices.push_back(State.get(Operand, VPIteration(0, 0))); + Indices.push_back(State.get(Operand, VPLane(0))); else Indices.push_back(State.get(Operand)); } @@ -1743,7 +1741,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0) ? DL.getIndexType(IndexedTy->getPointerTo()) : Builder.getInt32Ty(); - Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); + Value *Ptr = State.get(getOperand(0), VPLane(0)); bool InBounds = isInBounds(); Value *ResultPtr = nullptr; @@ -1844,7 +1842,7 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPReductionRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Reduction being replicated."); + assert(!State.Lane && "Reduction being replicated."); Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true); RecurKind Kind = RdxDesc.getRecurrenceKind(); // Propagate the fast-math flags carried by the underlying instruction. @@ -1894,7 +1892,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { } void VPReductionEVLRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Reduction being replicated."); + assert(!State.Lane && "Reduction being replicated."); auto &Builder = State.Builder; // Propagate the fast-math flags carried by the underlying instruction. @@ -1905,7 +1903,7 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) { RecurKind Kind = RdxDesc.getRecurrenceKind(); Value *Prev = State.get(getChainOp(), /*IsScalar*/ true); Value *VecOp = State.get(getVecOp()); - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); VectorBuilder VBuilder(Builder); VBuilder.setEVL(EVL); @@ -2027,7 +2025,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) { case Instruction::ZExt: case Instruction::Trunc: { // Note: SExt/ZExt not used yet. - Value *Op = State.get(getOperand(0), VPIteration(0, 0)); + Value *Op = State.get(getOperand(0), VPLane(0)); return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); } default: @@ -2036,7 +2034,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) { } void VPScalarCastRecipe ::execute(VPTransformState &State) { - State.set(this, generate(State), VPIteration(0, 0)); + State.set(this, generate(State), VPLane(0)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2051,9 +2049,9 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, #endif void VPBranchOnMaskRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Branch on Mask works only on single instance."); + assert(State.Lane && "Branch on Mask works only on single instance."); - unsigned Lane = State.Instance->Lane.getKnownLane(); + unsigned Lane = State.Lane->getKnownLane(); Value *ConditionBit = nullptr; VPValue *BlockInMask = getMask(); @@ -2076,9 +2074,9 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) { } void VPPredInstPHIRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Predicated instruction PHI works per instance."); + assert(State.Lane && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = - cast(State.get(getOperand(0), *State.Instance)); + cast(State.get(getOperand(0), *State.Lane)); BasicBlock *PredicatedBB = ScalarPredInst->getParent(); BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); @@ -2110,13 +2108,13 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), PredicatingBB); Phi->addIncoming(ScalarPredInst, PredicatedBB); - if (State.hasScalarValue(this, *State.Instance)) - State.reset(this, Phi, *State.Instance); + if (State.hasScalarValue(this, *State.Lane)) + State.reset(this, Phi, *State.Lane); else - State.set(this, Phi, *State.Instance); + State.set(this, Phi, *State.Lane); // NOTE: Currently we need to update the value of the operand, so the next // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), Phi, *State.Instance); + State.reset(getOperand(0), Phi, *State.Lane); } } @@ -2239,7 +2237,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; State.setDebugLocFrom(getDebugLoc()); CallInst *NewLI; - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); Value *Addr = State.get(getAddr(), !CreateGather); Value *Mask = nullptr; if (VPValue *VPMask = getMask()) { @@ -2337,7 +2335,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { CallInst *NewSI = nullptr; Value *StoredVal = State.get(StoredValue); - Value *EVL = State.get(getEVL(), VPIteration(0, 0)); + Value *EVL = State.get(getEVL(), VPLane(0)); if (isReverse()) StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse"); Value *Mask = nullptr; @@ -2463,7 +2461,7 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef Vals, // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void VPInterleaveRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Interleave group being replicated."); + assert(!State.Lane && "Interleave group being replicated."); const InterleaveGroup *Group = IG; Instruction *Instr = Group->getInsertPos(); @@ -2497,7 +2495,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { Idx = State.Builder.getInt32(-Index); VPValue *Addr = getAddr(); - Value *ResAddr = State.get(Addr, VPIteration(0, 0)); + Value *ResAddr = State.get(Addr, VPLane(0)); if (auto *I = dyn_cast(ResAddr)) State.setDebugLocFrom(I->getDebugLoc()); @@ -2797,7 +2795,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { // A pointer induction, performed by using a gep BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint(); - Value *ScalarStepValue = State.get(getOperand(1), VPIteration(0, 0)); + Value *ScalarStepValue = State.get(getOperand(1), VPLane(0)); Type *PhiType = IndDesc.getStep()->getType(); Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); // Add induction update using an incorrect block temporarily. The phi node @@ -2831,7 +2829,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { StartOffset = State.Builder.CreateAdd( StartOffset, State.Builder.CreateStepVector(VecPhiType)); - assert(ScalarStepValue == State.get(getOperand(1), VPIteration(0, 0)) && + assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) && "scalar step must be the same across all parts"); Value *GEP = State.Builder.CreateGEP( State.Builder.getInt8Ty(), NewPointerPhi, @@ -2861,7 +2859,7 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPExpandSCEVRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "cannot be used in per-lane"); + assert(!State.Lane && "cannot be used in per-lane"); const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); SCEVExpander Exp(SE, DL, "induction"); @@ -2870,7 +2868,7 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) { assert(!State.ExpandedSCEVs.contains(Expr) && "Same SCEV expanded multiple times"); State.ExpandedSCEVs[Expr] = Res; - State.set(this, Res, {0, 0}); + State.set(this, Res, VPLane(0)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3079,7 +3077,7 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); - Value *Start = State.get(getOperand(0), VPIteration(0, 0)); + Value *Start = State.get(getOperand(0), VPLane(0)); PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); Phi->addIncoming(Start, VectorPH); Phi->setDebugLoc(getDebugLoc()); From fff03b07c6048f2b9c45a9f71e6fb38e09d4856c Mon Sep 17 00:00:00 2001 From: Lukacma Date: Wed, 25 Sep 2024 17:00:40 +0100 Subject: [PATCH 190/212] Fix "[AArch64] Implement intrinsics for SME2 FSCALE" (#109999) This patch fixes failure in acle_sme2_fp8_scale.c test --- .../acle_sme2_fp8_scale.c | 564 ++++++++---------- 1 file changed, 264 insertions(+), 300 deletions(-) diff --git a/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c index b733e772ba307..6bcf9bc946b20 100644 --- a/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c +++ b/clang/test/CodeGen/aarch64-fp8-intrinsics/acle_sme2_fp8_scale.c @@ -18,25 +18,21 @@ // Single x2 // CHECK-LABEL: @test_svscale_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x213svfloat16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat16x2_t test_svscale_single_f16_x2(svfloat16x2_t op1, svint16_t op2) __arm_streaming { @@ -45,25 +41,21 @@ svfloat16x2_t test_svscale_single_f16_x2(svfloat16x2_t op1, svint16_t op2) __arm // CHECK-LABEL: @test_svscale_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x213svfloat32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat32x2_t test_svscale_single_f32_x2(svfloat32x2_t op1, svint32_t op2) __arm_streaming { @@ -72,25 +64,21 @@ svfloat32x2_t test_svscale_single_f32_x2(svfloat32x2_t op1, svint32_t op2) __arm // CHECK-LABEL: @test_svscale_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x213svfloat64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64( [[TMP2]], [[TMP3]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat64x2_t test_svscale_single_f64_x2(svfloat64x2_t op1, svint64_t op2) __arm_streaming { @@ -100,37 +88,29 @@ svfloat64x2_t test_svscale_single_f64_x2(svfloat64x2_t op1, svint64_t op2) __arm // Single x4 // CHECK-LABEL: @test_svscale_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f16_x413svfloat16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] // svfloat16x4_t test_svscale_single_f16_x4(svfloat16x4_t op1, svint16_t op2) __arm_streaming { @@ -139,37 +119,29 @@ svfloat16x4_t test_svscale_single_f16_x4(svfloat16x4_t op1, svint16_t op2) __arm // CHECK-LABEL: @test_svscale_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f32_x413svfloat32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] // svfloat32x4_t test_svscale_single_f32_x4(svfloat32x4_t op1, svint32_t op2) __arm_streaming { @@ -178,37 +150,29 @@ svfloat32x4_t test_svscale_single_f32_x4(svfloat32x4_t op1, svint32_t op2) __arm // CHECK-LABEL: @test_svscale_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CHECK-NEXT: ret { , , , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z26test_svscale_single_f64_x413svfloat64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP8]] // svfloat64x4_t test_svscale_single_f64_x4(svfloat64x4_t op1, svint64_t op2) __arm_streaming { @@ -218,29 +182,29 @@ svfloat64x4_t test_svscale_single_f64_x4(svfloat64x4_t op1, svint64_t op2) __arm // Multi x2 // CHECK-LABEL: @test_svscale_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f16_x213svfloat16x2_t11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP2]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] // svfloat16x2_t test_svscale_f16_x2(svfloat16x2_t op1, svint16x2_t op2) __arm_streaming { @@ -249,29 +213,29 @@ svfloat16x2_t test_svscale_f16_x2(svfloat16x2_t op1, svint16x2_t op2) __arm_stre // CHECK-LABEL: @test_svscale_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f32_x213svfloat32x2_t11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP2]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] // svfloat32x2_t test_svscale_f32_x2(svfloat32x2_t op1, svint32x2_t op2) __arm_streaming { @@ -280,29 +244,29 @@ svfloat32x2_t test_svscale_f32_x2(svfloat32x2_t op1, svint32x2_t op2) __arm_stre // CHECK-LABEL: @test_svscale_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret { , } [[TMP8]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f64_x213svfloat64x2_t11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP2]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , } [[TMP2]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret { , } [[TMP8]] // svfloat64x2_t test_svscale_f64_x2(svfloat64x2_t op1, svint64x2_t op2) __arm_streaming { @@ -312,45 +276,45 @@ svfloat64x2_t test_svscale_f64_x2(svfloat64x2_t op1, svint64x2_t op2) __arm_stre // Multi x4 // CHECK-LABEL: @test_svscale_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f16_x413svfloat16x4_t11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP2]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] // svfloat16x4_t test_svscale_f16_x4(svfloat16x4_t op1, svint16x4_t op2) __arm_streaming { @@ -359,45 +323,45 @@ svfloat16x4_t test_svscale_f16_x4(svfloat16x4_t op1, svint16x4_t op2) __arm_stre // CHECK-LABEL: @test_svscale_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f32_x413svfloat32x4_t11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP2]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] // svfloat32x4_t test_svscale_f32_x4(svfloat32x4_t op1, svint32x4_t op2) __arm_streaming { @@ -406,45 +370,45 @@ svfloat32x4_t test_svscale_f32_x4(svfloat32x4_t op1, svint32x4_t op2) __arm_stre // CHECK-LABEL: @test_svscale_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CHECK-NEXT: ret { , , , } [[TMP16]] // // CPP-CHECK-LABEL: @_Z19test_svscale_f64_x413svfloat64x4_t11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP2]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP1_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP1_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP1_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP1_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[OP2_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[OP2_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[OP2_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[OP2_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP7]], 0 +// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP7]], 1 +// CPP-CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[TMP7]], 2 +// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP7]], 3 +// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call { , , , } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64( [[TMP8]], [[TMP9]], [[TMP10]], [[TMP11]], [[TMP12]], [[TMP13]], [[TMP14]], [[TMP15]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP16]] // svfloat64x4_t test_svscale_f64_x4(svfloat64x4_t op1, svint64x4_t op2) __arm_streaming { From d01e336336f2b7fb4137e3dcc7d5c0b06ca1f3d6 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 25 Sep 2024 18:15:45 +0200 Subject: [PATCH 191/212] [Driver] Enable ASan on Solaris/SPARC (#107403) Once PR #107223 lands, ASan can be enabled on Solaris/SPARC. This patch does just that. As on Solaris/x86, the dynamic ASan runtime lib needs to be linked with `-z now` to avoid an `AsanInitInternal` cycle. Tested on `sparcv9-sun-solaris2.11` and `sparc64-unknown-linux-gnu`. --- clang/lib/Driver/ToolChains/Solaris.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index cf39038dcac37..fd3232b7c1b06 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -266,8 +266,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } // Avoid AsanInitInternal cycle, Issue #64126. - if (ToolChain.getTriple().isX86() && SA.needsSharedRt() && - SA.needsAsanRt()) { + if (SA.needsSharedRt() && SA.needsAsanRt()) { CmdArgs.push_back("-z"); CmdArgs.push_back("now"); } @@ -334,10 +333,11 @@ Solaris::Solaris(const Driver &D, const llvm::Triple &Triple, } SanitizerMask Solaris::getSupportedSanitizers() const { + const bool IsSparc = getTriple().getArch() == llvm::Triple::sparc; const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; SanitizerMask Res = ToolChain::getSupportedSanitizers(); - // FIXME: Omit X86_64 until 64-bit support is figured out. - if (IsX86) { + // FIXME: Omit SparcV9 and X86_64 until 64-bit support is figured out. + if (IsSparc || IsX86) { Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; From cebb7c010854e39a77065cfd681db91a79e7ce15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20K=C3=B6ppe?= Date: Wed, 25 Sep 2024 17:25:46 +0100 Subject: [PATCH 192/212] [clang-tidy] modernize-use-nullptr matches "NULL" in templates (#109169) Make modernize-use-nullptr matcher also match "NULL", but not "0", when it appears on a substituted type of a template specialization. Previously, any matches on a substituted type were excluded, but this meant that a situation like the following is not diagnosed: ```c++ template struct X { T val; X() { val = NULL; } // should diagnose }; ``` When the user says `NULL`, we expect that the destination type is always meant to be a pointer type, so this should be converted to `nullptr`. By contrast, we do not propose changing a literal `0` in that case, which appears as initializers of both pointer and integer specializations in reasonable real code. (If `NULL` is used erroneously in such a situation, it should be changed to `0` or `{}`.) --- .../clang-tidy/modernize/UseNullptrCheck.cpp | 12 ++++++++- clang-tools-extra/docs/ReleaseNotes.rst | 4 +++ .../checkers/modernize/use-nullptr.cpp | 25 +++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp index 6a003a347bada..108717e151b57 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp @@ -35,10 +35,20 @@ AST_MATCHER(Type, sugaredNullptrType) { /// to null within. /// Finding sequences of explicit casts is necessary so that an entire sequence /// can be replaced instead of just the inner-most implicit cast. +/// +/// TODO/NOTE: The second "anyOf" below discards matches on a substituted type, +/// since we don't know if that would _always_ be a pointer type for all other +/// specializations, unless the expression was "__null", in which case we assume +/// that all specializations are expected to be for pointer types. Ideally this +/// would check for the "NULL" macro instead, but that'd be harder to express. +/// In practice, "NULL" is often defined as "__null", and this is a useful +/// condition. StatementMatcher makeCastSequenceMatcher(llvm::ArrayRef NameList) { auto ImplicitCastToNull = implicitCastExpr( anyOf(hasCastKind(CK_NullToPointer), hasCastKind(CK_NullToMemberPointer)), - unless(hasImplicitDestinationType(qualType(substTemplateTypeParmType()))), + anyOf(hasSourceExpression(gnuNullExpr()), + unless(hasImplicitDestinationType( + qualType(substTemplateTypeParmType())))), unless(hasSourceExpression(hasType(sugaredNullptrType()))), unless(hasImplicitDestinationType( qualType(matchers::matchesAnyListedTypeName(NameList))))); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 44b1f8c07edd3..9a130a23b6e89 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -166,6 +166,10 @@ Changes in existing checks a false positive when only an implicit conversion happened inside an initializer list. +- Improved :doc:`modernize-use-nullptr + ` check to also recognize + ``NULL``/``__null`` (but not ``0``) when used with a templated type. + - Improved :doc:`modernize-use-std-print ` check to support replacing member function calls too. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp index 7bc0925136aa8..2c36349da896c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-nullptr.cpp @@ -84,6 +84,31 @@ void test_macro_expansion4() { #undef MY_NULL } +template struct pear { + // If you say __null (or NULL), we assume that T will always be a pointer + // type, so we suggest replacing it with nullptr. (We only check __null here, + // because in this test NULL is defined as 0, but real library implementations + // it is often defined as __null and the check will catch it.) + void f() { x = __null; } + // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: use nullptr [modernize-use-nullptr] + // CHECK-FIXES: x = nullptr; + + // But if you say 0, we allow the possibility that T can be used with integral + // and pointer types, and "0" is an acceptable initializer (even if "{}" might + // be even better). + void g() { y = 0; } + // CHECK-MESSAGES-NOT: :[[@LINE-1]] warning: use nullptr + + T x; + T y; +}; +void test_templated() { + pear p; + p.f(); + p.g(); + dummy(p.x); +} + #define IS_EQ(x, y) if (x != y) return; void test_macro_args() { int i = 0; From 78c6506543dee13c9335edc5c85bc73c4853fbd7 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 25 Sep 2024 12:40:14 -0400 Subject: [PATCH 193/212] [libc++] Disable the clang-tidy checks to get CI back (#109989) The CI has been a complete mess for the past week, and the only thing preventing it from being back is the Clang tidy checks. Disable them (as a total hack) to get CI back. --- libcxx/test/tools/clang_tidy_checks/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt index 5de2d44994ad0..0e1d3506a9973 100644 --- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt +++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt @@ -1,3 +1,5 @@ +# TODO: Re-enable the tests once the CI is back under control +return() # The find_package changes these variables. This leaves the build in an odd # state. Calling cmake a second time tries to write site config information in From 1c1bb7749860b4265c002528cbfe4b6c623b934c Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 25 Sep 2024 12:41:09 -0400 Subject: [PATCH 194/212] [libc++abi] Fix issue when building the demangler in C++11 Captures with an initializer only work in C++14. This broke the C++11 CI but wasn't noticed because our CI was down. --- libcxxabi/src/demangle/ItaniumDemangle.h | 3 ++- llvm/include/llvm/Demangle/ItaniumDemangle.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 723bdfe324b14..501d0b6fdfcd1 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -2632,7 +2632,8 @@ template struct NodeKind; #include "ItaniumNodes.def" inline bool NodeArray::printAsString(OutputBuffer &OB) const { - auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { + auto StartPos = OB.getCurrentPosition(); + auto Fail = [&OB, StartPos] { OB.setCurrentPosition(StartPos); return false; }; diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 9ada4d747b1ce..56ff3cfb148f0 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -2632,7 +2632,8 @@ template struct NodeKind; #include "ItaniumNodes.def" inline bool NodeArray::printAsString(OutputBuffer &OB) const { - auto Fail = [&OB, StartPos = OB.getCurrentPosition()] { + auto StartPos = OB.getCurrentPosition(); + auto Fail = [&OB, StartPos] { OB.setCurrentPosition(StartPos); return false; }; From 660ddb3a9357e766eb628abb8ea8c0776951d0db Mon Sep 17 00:00:00 2001 From: Edd Dawson Date: Wed, 25 Sep 2024 18:08:32 +0100 Subject: [PATCH 195/212] [PS4,PS5][Driver] Pass `-L/target/lib -L.` to linker (#109796) The proprietary PS4 linker implicitly adds `=/target/lib` and `.` as library search paths. This behaviour was added to the PS5 linker via a downstream patch in LLD. This really belongs in the driver, instead. This change adds the driver behaviour to allow removal of the downstream patch in LLD. There are no plans to update the PS4 linker behaviour in the analogous way, so do not pass the same search paths to the PS4 linker. SIE tracker: TOOLCHAIN-16704 --- clang/lib/Driver/ToolChains/PS4CPU.cpp | 6 ++++++ clang/test/Driver/ps5-linker.c | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index db77d058bcc59..7c028f18c0308 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -186,6 +186,9 @@ void tools::PS4cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) TC.addSanitizerArgs(Args, CmdArgs, "-l", ""); + // Other drivers typically add library search paths (`-L`) here via + // TC.AddFilePathLibArgs(). We don't do that on PS4 as the PS4 linker + // searches those locations by default. Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_s, options::OPT_t}); @@ -290,6 +293,7 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) TC.addSanitizerArgs(Args, CmdArgs, "-l", ""); + TC.AddFilePathLibArgs(Args, CmdArgs); Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, options::OPT_s, options::OPT_t}); @@ -382,6 +386,8 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple, llvm::sys::path::append(Dir, "target/include"); CheckSDKPartExists(Dir, "system headers"); } + + getFilePaths().push_back("."); } void toolchains::PS4PS5Base::AddClangSystemIncludeArgs( diff --git a/clang/test/Driver/ps5-linker.c b/clang/test/Driver/ps5-linker.c index c0cf0b864028c..4ae65963e361a 100644 --- a/clang/test/Driver/ps5-linker.c +++ b/clang/test/Driver/ps5-linker.c @@ -46,3 +46,27 @@ // CHECK-SYSROOT: {{ld(\.exe)?}}" // CHECK-SYSROOT-SAME: "--sysroot=mysdk" + +// Test that "." is always added to library search paths. This is long-standing +// behavior, unique to PlayStation toolchains. + +// RUN: %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-LDOT %s + +// CHECK-LDOT: {{ld(\.exe)?}}" +// CHECK-LDOT-SAME: "-L." + +// Test that /target/lib is added to library search paths, if it +// exists and no --sysroot is specified. + +// RUN: rm -rf %t.dir && mkdir %t.dir +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### --sysroot=%t.dir 2>&1 | FileCheck --check-prefixes=CHECK-NO-TARGETLIB %s + +// CHECK-NO-TARGETLIB: {{ld(\.exe)?}}" +// CHECK-NO-TARGETLIB-NOT: "-L{{.*[/\\]}}target/lib" + +// RUN: mkdir -p %t.dir/target/lib +// RUN: env SCE_PROSPERO_SDK_DIR=%t.dir %clang --target=x64_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-TARGETLIB %s + +// CHECK-TARGETLIB: {{ld(\.exe)?}}" +// CHECK-TARGETLIB-SAME: "-L{{.*[/\\]}}target/lib" From a280275cff497f96492d7c1094ba30309dbd3ad6 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Wed, 25 Sep 2024 18:11:02 +0100 Subject: [PATCH 196/212] [compiler-rt] Fix #83679 for macos sdk < 13.0 (#109946) --- .../sanitizer_common/sanitizer_platform_interceptors.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 05fa7e63268f2..d4cc380f641b8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -606,7 +606,13 @@ // FIXME: also available from musl 1.2.5 #define SANITIZER_INTERCEPT_PREADV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) #define SANITIZER_INTERCEPT_PWRITEV2 (SI_LINUX && __GLIBC_PREREQ(2, 26)) -#define SANITIZER_INTERCEPT_FREADLINK SI_MAC +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130000 +# define SI_MAC_DEPLOYMENT_BELOW_13_00 1 +#else +# define SI_MAC_DEPLOYMENT_BELOW_13_00 0 +#endif +#define SANITIZER_INTERCEPT_FREADLINK (SI_MAC && !SI_MAC_DEPLOYMENT_BELOW_13_00) // This macro gives a way for downstream users to override the above // interceptor macros irrespective of the platform they are on. They have From b3b6141ba1105ad5b9712a9c93891003170c32ac Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 25 Sep 2024 10:16:45 -0700 Subject: [PATCH 197/212] [lldb] Fix two formatv issues in LDB_LOG (NFC) --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 2 +- lldb/source/Target/Target.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index b0f49ebf2d2cb..264b2e8411407 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -9702,7 +9702,7 @@ ScratchTypeSystemClang::GetForTarget(Target &target, lldb::eLanguageTypeC, create_on_demand); if (auto err = type_system_or_err.takeError()) { LLDB_LOG_ERROR(GetLog(LLDBLog::Target), std::move(err), - "Couldn't get scratch TypeSystemClang"); + "Couldn't get scratch TypeSystemClang: {0}"); return nullptr; } auto ts_sp = *type_system_or_err; diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 29e9efb83efeb..6123e5b9c2090 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -3617,7 +3617,7 @@ void Target::FinalizeFileActions(ProcessLaunchInfo &info) { if (info.GetFileActionForFD(STDERR_FILENO) == nullptr) err_file_spec = GetStandardErrorPath(); - LLDB_LOG(log, "target stdin='{0}', target stdout='{1}', stderr='{1}'", + LLDB_LOG(log, "target stdin='{0}', target stdout='{1}', stderr='{2}'", in_file_spec, out_file_spec, err_file_spec); if (in_file_spec) { From 72307ba615952ffa3be9be0d2b175b70e8c86710 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 25 Sep 2024 10:22:13 -0700 Subject: [PATCH 198/212] [ELF] Pass Ctx & to Driver --- lld/ELF/Driver.cpp | 53 ++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 343fc4989fa4c..dcdd74ac74f5f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -850,7 +850,7 @@ static ICFLevel getICF(opt::InputArgList &args) { return ICFLevel::All; } -static StripPolicy getStrip(opt::InputArgList &args) { +static StripPolicy getStrip(Ctx &ctx, opt::InputArgList &args) { if (args.hasArg(OPT_relocatable)) return StripPolicy::None; if (!ctx.arg.zSectionHeader) @@ -953,7 +953,7 @@ static std::pair getPackDynRelocs(opt::InputArgList &args) { return {false, false}; } -static void readCallGraph(MemoryBufferRef mb) { +static void readCallGraph(Ctx &ctx, MemoryBufferRef mb) { // Build a map from symbol name to section DenseMap map; for (ELFFileBase *file : ctx.objectFiles) @@ -1041,7 +1041,7 @@ processCallGraphRelocations(SmallVector &symbolIndices, return !symbolIndices.empty(); } -template static void readCallGraphsFromObjectFiles() { +template static void readCallGraphsFromObjectFiles(Ctx &ctx) { SmallVector symbolIndices; ArrayRef cgProfile; for (auto file : ctx.objectFiles) { @@ -1070,7 +1070,8 @@ template static void readCallGraphsFromObjectFiles() { } template -static void ltoValidateAllVtablesHaveTypeInfos(opt::InputArgList &args) { +static void ltoValidateAllVtablesHaveTypeInfos(Ctx &ctx, + opt::InputArgList &args) { DenseSet typeInfoSymbols; SmallSetVector vtableSymbols; auto processVtableAndTypeInfoSymbols = [&](StringRef name) { @@ -1184,7 +1185,8 @@ getOldNewOptionsExtra(opt::InputArgList &args, unsigned id) { } // Parse the symbol ordering file and warn for any duplicate entries. -static SmallVector getSymbolOrderingFile(MemoryBufferRef mb) { +static SmallVector getSymbolOrderingFile(Ctx &ctx, + MemoryBufferRef mb) { SetVector> names; for (StringRef s : args::getLines(mb)) if (!names.insert(s) && ctx.arg.warnSymbolOrdering) @@ -1193,7 +1195,7 @@ static SmallVector getSymbolOrderingFile(MemoryBufferRef mb) { return names.takeVector(); } -static bool getIsRela(opt::InputArgList &args) { +static bool getIsRela(Ctx &ctx, opt::InputArgList &args) { // The psABI specifies the default relocation entry format. bool rela = is_contained({EM_AARCH64, EM_AMDGPU, EM_HEXAGON, EM_LOONGARCH, EM_PPC, EM_PPC64, EM_RISCV, EM_S390, EM_X86_64}, @@ -1212,7 +1214,7 @@ static bool getIsRela(opt::InputArgList &args) { return rela; } -static void parseClangOption(StringRef opt, const Twine &msg) { +static void parseClangOption(Ctx &ctx, StringRef opt, const Twine &msg) { std::string err; raw_string_ostream os(err); @@ -1228,7 +1230,7 @@ static bool isValidReportString(StringRef arg) { } // Process a remap pattern 'from-glob=to-file'. -static bool remapInputs(StringRef line, const Twine &location) { +static bool remapInputs(Ctx &ctx, StringRef line, const Twine &location) { SmallVector fields; line.split(fields, '='); if (fields.size() != 2 || fields[1].empty()) { @@ -1440,7 +1442,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { args::getInteger(args, OPT_split_stack_adjust_size, 16384); ctx.arg.zSectionHeader = getZFlag(args, "sectionheader", "nosectionheader", true); - ctx.arg.strip = getStrip(args); // needs zSectionHeader + ctx.arg.strip = getStrip(ctx, args); // needs zSectionHeader ctx.arg.sysroot = args.getLastArgValue(OPT_sysroot); ctx.arg.target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); ctx.arg.target2 = getTarget2(args); @@ -1535,7 +1537,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { for (opt::Arg *arg : args.filtered(OPT_remap_inputs)) { StringRef value(arg->getValue()); - remapInputs(value, arg->getSpelling()); + remapInputs(ctx, value, arg->getSpelling()); } for (opt::Arg *arg : args.filtered(OPT_remap_inputs_file)) { StringRef filename(arg->getValue()); @@ -1544,7 +1546,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { continue; // Parse 'from-glob=to-file' lines, ignoring #-led comments. for (auto [lineno, line] : llvm::enumerate(args::getLines(*buffer))) - if (remapInputs(line, filename + ":" + Twine(lineno + 1))) + if (remapInputs(ctx, line, filename + ":" + Twine(lineno + 1))) break; } @@ -1637,11 +1639,12 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { // Parse LTO options. if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq)) - parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())), + parseClangOption(ctx, saver().save("-mcpu=" + StringRef(arg->getValue())), arg->getSpelling()); for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq_minus)) - parseClangOption(std::string("-") + arg->getValue(), arg->getSpelling()); + parseClangOption(ctx, std::string("-") + arg->getValue(), + arg->getSpelling()); // GCC collect2 passes -plugin-opt=path/to/lto-wrapper with an absolute or // relative path. Just ignore. If not ended with "lto-wrapper" (or @@ -1658,7 +1661,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { // Parse -mllvm options. for (const auto *arg : args.filtered(OPT_mllvm)) { - parseClangOption(arg->getValue(), arg->getSpelling()); + parseClangOption(ctx, arg->getValue(), arg->getSpelling()); ctx.arg.mllvmOpts.emplace_back(arg->getValue()); } @@ -1758,7 +1761,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { error("--symbol-ordering-file and --call-graph-order-file " "may not be used together"); if (std::optional buffer = readFile(arg->getValue())) { - ctx.arg.symbolOrderingFile = getSymbolOrderingFile(*buffer); + ctx.arg.symbolOrderingFile = getSymbolOrderingFile(ctx, *buffer); // Also need to disable CallGraphProfileSort to prevent // LLD order symbols with CGProfile ctx.arg.callGraphProfileSort = CGProfileSortKind::None; @@ -1851,7 +1854,7 @@ static void setConfigs(Ctx &ctx, opt::InputArgList &args) { // We pick the format for dynamic relocations according to the psABI for each // processor, but a contrary choice can be made if the dynamic loader // supports. - ctx.arg.isRela = getIsRela(args); + ctx.arg.isRela = getIsRela(ctx, args); // If the output uses REL relocations we must store the dynamic relocation // addends to the output sections. We also store addends for RELA relocations @@ -2146,7 +2149,7 @@ static DenseSet getExcludeLibs(opt::InputArgList &args) { // A special library name "ALL" means all archive files. // // This is not a popular option, but some programs such as bionic libc use it. -static void excludeLibs(opt::InputArgList &args) { +static void excludeLibs(Ctx &ctx, opt::InputArgList &args) { DenseSet libs = getExcludeLibs(args); bool all = libs.count("ALL"); @@ -2441,7 +2444,7 @@ static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) { // are used to control which partition a symbol is allocated to. See // https://lld.llvm.org/Partitions.html for more details on partitions. template -static void readSymbolPartitionSection(InputSectionBase *s) { +static void readSymbolPartitionSection(Ctx &ctx, InputSectionBase *s) { // Read the relocation that refers to the partition's entry point symbol. Symbol *sym; const RelsOrRelas rels = s->template relsOrRelas(); @@ -2961,7 +2964,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // 'has undefined version' error in -shared --exclude-libs=ALL mode (PR36295). // GNU ld errors in this case. if (args.hasArg(OPT_exclude_libs)) - excludeLibs(args); + excludeLibs(ctx, args); // Create elfHeader early. We need a dummy section in // addReservedSymbols to mark the created symbols as not absolute. @@ -2994,7 +2997,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Handle --lto-validate-all-vtables-have-type-infos. if (ctx.arg.ltoValidateAllVtablesHaveTypeInfos) - ltoValidateAllVtablesHaveTypeInfos(args); + ltoValidateAllVtablesHaveTypeInfos(ctx, args); // Do link-time optimization if given files are LLVM bitcode files. // This compiles bitcode files into real object files. @@ -3045,7 +3048,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // libcalls symbols defined in an excluded archive. This may override // versionId set by scanVersionScript(). if (args.hasArg(OPT_exclude_libs)) - excludeLibs(args); + excludeLibs(ctx, args); // Record [__acle_se_, ] pairs for later processing. processArmCmseSymbols(); @@ -3079,10 +3082,10 @@ template void LinkerDriver::link(opt::InputArgList &args) { { llvm::TimeTraceScope timeScope("Strip sections"); if (ctx.hasSympart.load(std::memory_order_relaxed)) { - llvm::erase_if(ctx.inputSections, [](InputSectionBase *s) { + llvm::erase_if(ctx.inputSections, [&ctx = ctx](InputSectionBase *s) { if (s->type != SHT_LLVM_SYMPART) return false; - readSymbolPartitionSection(s); + readSymbolPartitionSection(ctx, s); return true; }); } @@ -3204,8 +3207,8 @@ template void LinkerDriver::link(opt::InputArgList &args) { if (ctx.arg.callGraphProfileSort != CGProfileSortKind::None) { if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) if (std::optional buffer = readFile(arg->getValue())) - readCallGraph(*buffer); - readCallGraphsFromObjectFiles(); + readCallGraph(ctx, *buffer); + readCallGraphsFromObjectFiles(ctx); } // Write the result to the file. From 3c348bf5435896bea70f613d9bdcc542201075b4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 25 Sep 2024 10:29:44 -0700 Subject: [PATCH 199/212] [RISCV] Fold (fmv_x_h/w (load)) to an integer load. (#109900) --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 11 + .../CodeGen/RISCV/fastcc-without-f-reg.ll | 620 ++++---- llvm/test/CodeGen/RISCV/half-arith.ll | 30 +- .../rvv/fixed-vectors-fp-buildvec-bf16.ll | 6 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 6 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 1408 +++++++---------- 6 files changed, 911 insertions(+), 1170 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7b00b2514c4ef..56c9ba67bb35e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16984,6 +16984,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return Op0.getOperand(0); } + if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() && + cast(Op0)->isSimple()) { + MVT IVT = MVT::getIntegerVT(Op0.getValueSizeInBits()); + auto *LN0 = cast(Op0); + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), + LN0->getBasePtr(), IVT, LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1)); + return Load; + } + // This is a target-specific version of a DAGCombine performed in // DAGCombiner::visitBITCAST. It performs the equivalent of: // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index 8e2fdfc4ba94c..ca40ba0399973 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -246,32 +246,28 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind { define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-LABEL: caller_half_32: ; ZHINX32: # %bb.0: -; ZHINX32-NEXT: addi sp, sp, -112 -; ZHINX32-NEXT: sw ra, 108(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s0, 104(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s1, 100(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s2, 96(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s3, 92(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s4, 88(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s5, 84(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s6, 80(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s7, 76(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s8, 72(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s9, 68(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s10, 64(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 124(sp) -; ZHINX32-NEXT: sw t0, 56(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 120(sp) -; ZHINX32-NEXT: sw t0, 52(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t0, 116(sp) -; ZHINX32-NEXT: sw t0, 48(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: addi sp, sp, -96 +; ZHINX32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s4, 72(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s5, 68(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s6, 64(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s7, 60(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s8, 56(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s9, 52(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s10, 48(sp) # 4-byte Folded Spill +; ZHINX32-NEXT: sw s11, 44(sp) # 4-byte Folded Spill ; ZHINX32-NEXT: lh t0, 112(sp) -; ZHINX32-NEXT: sw t0, 44(sp) # 4-byte Folded Spill -; ZHINX32-NEXT: lh t6, 128(sp) -; ZHINX32-NEXT: lh t5, 132(sp) -; ZHINX32-NEXT: lh t4, 136(sp) -; ZHINX32-NEXT: lh s0, 140(sp) +; ZHINX32-NEXT: lh t1, 116(sp) +; ZHINX32-NEXT: lh t2, 120(sp) +; ZHINX32-NEXT: lh s0, 124(sp) +; ZHINX32-NEXT: lh t3, 128(sp) +; ZHINX32-NEXT: lh t4, 132(sp) +; ZHINX32-NEXT: lh t5, 136(sp) +; ZHINX32-NEXT: lh t6, 140(sp) ; ZHINX32-NEXT: lh s1, 144(sp) ; ZHINX32-NEXT: lh s2, 148(sp) ; ZHINX32-NEXT: lh s3, 152(sp) @@ -284,79 +280,71 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh s10, 180(sp) ; ZHINX32-NEXT: lh s11, 184(sp) ; ZHINX32-NEXT: lh ra, 188(sp) -; ZHINX32-NEXT: lh t3, 192(sp) -; ZHINX32-NEXT: lh t2, 196(sp) -; ZHINX32-NEXT: lh t1, 200(sp) -; ZHINX32-NEXT: lh t0, 204(sp) -; ZHINX32-NEXT: sh t0, 38(sp) -; ZHINX32-NEXT: sh t1, 36(sp) -; ZHINX32-NEXT: sh t2, 34(sp) -; ZHINX32-NEXT: sh t3, 32(sp) -; ZHINX32-NEXT: sh ra, 30(sp) -; ZHINX32-NEXT: sh s11, 28(sp) -; ZHINX32-NEXT: sh s10, 26(sp) -; ZHINX32-NEXT: sh s9, 24(sp) -; ZHINX32-NEXT: sh s8, 22(sp) -; ZHINX32-NEXT: sh s7, 20(sp) -; ZHINX32-NEXT: sh s6, 18(sp) -; ZHINX32-NEXT: sh s5, 16(sp) -; ZHINX32-NEXT: sh s4, 14(sp) -; ZHINX32-NEXT: sh s3, 12(sp) -; ZHINX32-NEXT: sh s2, 10(sp) -; ZHINX32-NEXT: sh s1, 8(sp) +; ZHINX32-NEXT: sh ra, 38(sp) +; ZHINX32-NEXT: sh s11, 36(sp) +; ZHINX32-NEXT: sh s10, 34(sp) +; ZHINX32-NEXT: sh s9, 32(sp) +; ZHINX32-NEXT: sh s8, 30(sp) +; ZHINX32-NEXT: sh s7, 28(sp) +; ZHINX32-NEXT: sh s6, 26(sp) +; ZHINX32-NEXT: sh s5, 24(sp) +; ZHINX32-NEXT: sh s4, 22(sp) +; ZHINX32-NEXT: sh s3, 20(sp) +; ZHINX32-NEXT: sh s2, 18(sp) +; ZHINX32-NEXT: sh s1, 16(sp) +; ZHINX32-NEXT: sh t6, 14(sp) +; ZHINX32-NEXT: sh t5, 12(sp) +; ZHINX32-NEXT: sh t4, 10(sp) +; ZHINX32-NEXT: sh t3, 8(sp) +; ZHINX32-NEXT: lh t3, 96(sp) +; ZHINX32-NEXT: lh t4, 100(sp) +; ZHINX32-NEXT: lh t5, 104(sp) +; ZHINX32-NEXT: lh t6, 108(sp) ; ZHINX32-NEXT: sh s0, 6(sp) -; ZHINX32-NEXT: sh t4, 4(sp) -; ZHINX32-NEXT: sh t5, 2(sp) -; ZHINX32-NEXT: sh t6, 0(sp) -; ZHINX32-NEXT: lw t3, 44(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t4, 48(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t5, 52(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw t6, 56(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: sh t2, 4(sp) +; ZHINX32-NEXT: sh t1, 2(sp) +; ZHINX32-NEXT: sh t0, 0(sp) ; ZHINX32-NEXT: call callee_half_32 -; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s1, 100(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s2, 96(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s3, 92(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s4, 88(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s5, 84(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s6, 80(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s7, 76(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s8, 72(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s9, 68(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s10, 64(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: lw s11, 60(sp) # 4-byte Folded Reload -; ZHINX32-NEXT: addi sp, sp, 112 +; ZHINX32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s4, 72(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s5, 68(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s6, 64(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s7, 60(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s8, 56(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s9, 52(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s10, 48(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: lw s11, 44(sp) # 4-byte Folded Reload +; ZHINX32-NEXT: addi sp, sp, 96 ; ZHINX32-NEXT: ret ; ; ZHINX64-LABEL: caller_half_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -176 -; ZHINX64-NEXT: sd ra, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 96(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 80(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 72(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 200(sp) -; ZHINX64-NEXT: sd t0, 64(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 192(sp) -; ZHINX64-NEXT: sd t0, 56(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t0, 184(sp) -; ZHINX64-NEXT: sd t0, 48(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -144 +; ZHINX64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lh t0, 176(sp) -; ZHINX64-NEXT: sd t0, 40(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lh t6, 208(sp) -; ZHINX64-NEXT: lh t5, 216(sp) -; ZHINX64-NEXT: lh t4, 224(sp) -; ZHINX64-NEXT: lh s0, 232(sp) +; ZHINX64-NEXT: lh t1, 184(sp) +; ZHINX64-NEXT: lh t2, 192(sp) +; ZHINX64-NEXT: lh s0, 200(sp) +; ZHINX64-NEXT: lh t3, 208(sp) +; ZHINX64-NEXT: lh t4, 216(sp) +; ZHINX64-NEXT: lh t5, 224(sp) +; ZHINX64-NEXT: lh t6, 232(sp) ; ZHINX64-NEXT: lh s1, 240(sp) ; ZHINX64-NEXT: lh s2, 248(sp) ; ZHINX64-NEXT: lh s3, 256(sp) @@ -369,49 +357,45 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: lh s10, 312(sp) ; ZHINX64-NEXT: lh s11, 320(sp) ; ZHINX64-NEXT: lh ra, 328(sp) -; ZHINX64-NEXT: lh t3, 336(sp) -; ZHINX64-NEXT: lh t2, 344(sp) -; ZHINX64-NEXT: lh t1, 352(sp) -; ZHINX64-NEXT: lh t0, 360(sp) -; ZHINX64-NEXT: sh t0, 38(sp) -; ZHINX64-NEXT: sh t1, 36(sp) -; ZHINX64-NEXT: sh t2, 34(sp) -; ZHINX64-NEXT: sh t3, 32(sp) -; ZHINX64-NEXT: sh ra, 30(sp) -; ZHINX64-NEXT: sh s11, 28(sp) -; ZHINX64-NEXT: sh s10, 26(sp) -; ZHINX64-NEXT: sh s9, 24(sp) -; ZHINX64-NEXT: sh s8, 22(sp) -; ZHINX64-NEXT: sh s7, 20(sp) -; ZHINX64-NEXT: sh s6, 18(sp) -; ZHINX64-NEXT: sh s5, 16(sp) -; ZHINX64-NEXT: sh s4, 14(sp) -; ZHINX64-NEXT: sh s3, 12(sp) -; ZHINX64-NEXT: sh s2, 10(sp) -; ZHINX64-NEXT: sh s1, 8(sp) +; ZHINX64-NEXT: sh ra, 38(sp) +; ZHINX64-NEXT: sh s11, 36(sp) +; ZHINX64-NEXT: sh s10, 34(sp) +; ZHINX64-NEXT: sh s9, 32(sp) +; ZHINX64-NEXT: sh s8, 30(sp) +; ZHINX64-NEXT: sh s7, 28(sp) +; ZHINX64-NEXT: sh s6, 26(sp) +; ZHINX64-NEXT: sh s5, 24(sp) +; ZHINX64-NEXT: sh s4, 22(sp) +; ZHINX64-NEXT: sh s3, 20(sp) +; ZHINX64-NEXT: sh s2, 18(sp) +; ZHINX64-NEXT: sh s1, 16(sp) +; ZHINX64-NEXT: sh t6, 14(sp) +; ZHINX64-NEXT: sh t5, 12(sp) +; ZHINX64-NEXT: sh t4, 10(sp) +; ZHINX64-NEXT: sh t3, 8(sp) +; ZHINX64-NEXT: lh t3, 144(sp) +; ZHINX64-NEXT: lh t4, 152(sp) +; ZHINX64-NEXT: lh t5, 160(sp) +; ZHINX64-NEXT: lh t6, 168(sp) ; ZHINX64-NEXT: sh s0, 6(sp) -; ZHINX64-NEXT: sh t4, 4(sp) -; ZHINX64-NEXT: sh t5, 2(sp) -; ZHINX64-NEXT: sh t6, 0(sp) -; ZHINX64-NEXT: ld t3, 40(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 48(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 56(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: sh t2, 4(sp) +; ZHINX64-NEXT: sh t1, 2(sp) +; ZHINX64-NEXT: sh t0, 0(sp) ; ZHINX64-NEXT: call callee_half_32 -; ZHINX64-NEXT: ld ra, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 112(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 80(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 72(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 176 +; ZHINX64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 144 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_half_32: @@ -917,32 +901,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZHINX64-LABEL: caller_float_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -224 -; ZHINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 248(sp) -; ZHINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 240(sp) -; ZHINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t0, 232(sp) -; ZHINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -192 +; ZHINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZHINX64-NEXT: lw t0, 224(sp) -; ZHINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: lw t6, 256(sp) -; ZHINX64-NEXT: lw t5, 264(sp) -; ZHINX64-NEXT: lw t4, 272(sp) -; ZHINX64-NEXT: lw s0, 280(sp) +; ZHINX64-NEXT: lw t1, 232(sp) +; ZHINX64-NEXT: lw t2, 240(sp) +; ZHINX64-NEXT: lw s0, 248(sp) +; ZHINX64-NEXT: lw t3, 256(sp) +; ZHINX64-NEXT: lw t4, 264(sp) +; ZHINX64-NEXT: lw t5, 272(sp) +; ZHINX64-NEXT: lw t6, 280(sp) ; ZHINX64-NEXT: lw s1, 288(sp) ; ZHINX64-NEXT: lw s2, 296(sp) ; ZHINX64-NEXT: lw s3, 304(sp) @@ -955,49 +935,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: lw s10, 360(sp) ; ZHINX64-NEXT: lw s11, 368(sp) ; ZHINX64-NEXT: lw ra, 376(sp) -; ZHINX64-NEXT: lw t3, 384(sp) -; ZHINX64-NEXT: lw t2, 392(sp) -; ZHINX64-NEXT: lw t1, 400(sp) -; ZHINX64-NEXT: lw t0, 408(sp) -; ZHINX64-NEXT: sw t0, 76(sp) -; ZHINX64-NEXT: sw t1, 72(sp) -; ZHINX64-NEXT: sw t2, 68(sp) -; ZHINX64-NEXT: sw t3, 64(sp) -; ZHINX64-NEXT: sw ra, 60(sp) -; ZHINX64-NEXT: sw s11, 56(sp) -; ZHINX64-NEXT: sw s10, 52(sp) -; ZHINX64-NEXT: sw s9, 48(sp) -; ZHINX64-NEXT: sw s8, 44(sp) -; ZHINX64-NEXT: sw s7, 40(sp) -; ZHINX64-NEXT: sw s6, 36(sp) -; ZHINX64-NEXT: sw s5, 32(sp) -; ZHINX64-NEXT: sw s4, 28(sp) -; ZHINX64-NEXT: sw s3, 24(sp) -; ZHINX64-NEXT: sw s2, 20(sp) -; ZHINX64-NEXT: sw s1, 16(sp) +; ZHINX64-NEXT: sw ra, 76(sp) +; ZHINX64-NEXT: sw s11, 72(sp) +; ZHINX64-NEXT: sw s10, 68(sp) +; ZHINX64-NEXT: sw s9, 64(sp) +; ZHINX64-NEXT: sw s8, 60(sp) +; ZHINX64-NEXT: sw s7, 56(sp) +; ZHINX64-NEXT: sw s6, 52(sp) +; ZHINX64-NEXT: sw s5, 48(sp) +; ZHINX64-NEXT: sw s4, 44(sp) +; ZHINX64-NEXT: sw s3, 40(sp) +; ZHINX64-NEXT: sw s2, 36(sp) +; ZHINX64-NEXT: sw s1, 32(sp) +; ZHINX64-NEXT: sw t6, 28(sp) +; ZHINX64-NEXT: sw t5, 24(sp) +; ZHINX64-NEXT: sw t4, 20(sp) +; ZHINX64-NEXT: sw t3, 16(sp) +; ZHINX64-NEXT: lw t3, 192(sp) +; ZHINX64-NEXT: lw t4, 200(sp) +; ZHINX64-NEXT: lw t5, 208(sp) +; ZHINX64-NEXT: lw t6, 216(sp) ; ZHINX64-NEXT: sw s0, 12(sp) -; ZHINX64-NEXT: sw t4, 8(sp) -; ZHINX64-NEXT: sw t5, 4(sp) -; ZHINX64-NEXT: sw t6, 0(sp) -; ZHINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: sw t2, 8(sp) +; ZHINX64-NEXT: sw t1, 4(sp) +; ZHINX64-NEXT: sw t0, 0(sp) ; ZHINX64-NEXT: call callee_float_32 -; ZHINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 224 +; ZHINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 192 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_float_32: @@ -1087,32 +1063,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZFINX64-LABEL: caller_float_32: ; ZFINX64: # %bb.0: -; ZFINX64-NEXT: addi sp, sp, -224 -; ZFINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 248(sp) -; ZFINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 240(sp) -; ZFINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t0, 232(sp) -; ZFINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: addi sp, sp, -192 +; ZFINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZFINX64-NEXT: lw t0, 224(sp) -; ZFINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: lw t6, 256(sp) -; ZFINX64-NEXT: lw t5, 264(sp) -; ZFINX64-NEXT: lw t4, 272(sp) -; ZFINX64-NEXT: lw s0, 280(sp) +; ZFINX64-NEXT: lw t1, 232(sp) +; ZFINX64-NEXT: lw t2, 240(sp) +; ZFINX64-NEXT: lw s0, 248(sp) +; ZFINX64-NEXT: lw t3, 256(sp) +; ZFINX64-NEXT: lw t4, 264(sp) +; ZFINX64-NEXT: lw t5, 272(sp) +; ZFINX64-NEXT: lw t6, 280(sp) ; ZFINX64-NEXT: lw s1, 288(sp) ; ZFINX64-NEXT: lw s2, 296(sp) ; ZFINX64-NEXT: lw s3, 304(sp) @@ -1125,49 +1097,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: lw s10, 360(sp) ; ZFINX64-NEXT: lw s11, 368(sp) ; ZFINX64-NEXT: lw ra, 376(sp) -; ZFINX64-NEXT: lw t3, 384(sp) -; ZFINX64-NEXT: lw t2, 392(sp) -; ZFINX64-NEXT: lw t1, 400(sp) -; ZFINX64-NEXT: lw t0, 408(sp) -; ZFINX64-NEXT: sw t0, 76(sp) -; ZFINX64-NEXT: sw t1, 72(sp) -; ZFINX64-NEXT: sw t2, 68(sp) -; ZFINX64-NEXT: sw t3, 64(sp) -; ZFINX64-NEXT: sw ra, 60(sp) -; ZFINX64-NEXT: sw s11, 56(sp) -; ZFINX64-NEXT: sw s10, 52(sp) -; ZFINX64-NEXT: sw s9, 48(sp) -; ZFINX64-NEXT: sw s8, 44(sp) -; ZFINX64-NEXT: sw s7, 40(sp) -; ZFINX64-NEXT: sw s6, 36(sp) -; ZFINX64-NEXT: sw s5, 32(sp) -; ZFINX64-NEXT: sw s4, 28(sp) -; ZFINX64-NEXT: sw s3, 24(sp) -; ZFINX64-NEXT: sw s2, 20(sp) -; ZFINX64-NEXT: sw s1, 16(sp) +; ZFINX64-NEXT: sw ra, 76(sp) +; ZFINX64-NEXT: sw s11, 72(sp) +; ZFINX64-NEXT: sw s10, 68(sp) +; ZFINX64-NEXT: sw s9, 64(sp) +; ZFINX64-NEXT: sw s8, 60(sp) +; ZFINX64-NEXT: sw s7, 56(sp) +; ZFINX64-NEXT: sw s6, 52(sp) +; ZFINX64-NEXT: sw s5, 48(sp) +; ZFINX64-NEXT: sw s4, 44(sp) +; ZFINX64-NEXT: sw s3, 40(sp) +; ZFINX64-NEXT: sw s2, 36(sp) +; ZFINX64-NEXT: sw s1, 32(sp) +; ZFINX64-NEXT: sw t6, 28(sp) +; ZFINX64-NEXT: sw t5, 24(sp) +; ZFINX64-NEXT: sw t4, 20(sp) +; ZFINX64-NEXT: sw t3, 16(sp) +; ZFINX64-NEXT: lw t3, 192(sp) +; ZFINX64-NEXT: lw t4, 200(sp) +; ZFINX64-NEXT: lw t5, 208(sp) +; ZFINX64-NEXT: lw t6, 216(sp) ; ZFINX64-NEXT: sw s0, 12(sp) -; ZFINX64-NEXT: sw t4, 8(sp) -; ZFINX64-NEXT: sw t5, 4(sp) -; ZFINX64-NEXT: sw t6, 0(sp) -; ZFINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: sw t2, 8(sp) +; ZFINX64-NEXT: sw t1, 4(sp) +; ZFINX64-NEXT: sw t0, 0(sp) ; ZFINX64-NEXT: call callee_float_32 -; ZFINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: addi sp, sp, 224 +; ZFINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: addi sp, sp, 192 ; ZFINX64-NEXT: ret ; ; ZDINX32-LABEL: caller_float_32: @@ -1257,32 +1225,28 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZDINX64-LABEL: caller_float_32: ; ZDINX64: # %bb.0: -; ZDINX64-NEXT: addi sp, sp, -224 -; ZDINX64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 248(sp) -; ZDINX64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 240(sp) -; ZDINX64-NEXT: sd t0, 104(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t0, 232(sp) -; ZDINX64-NEXT: sd t0, 96(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: addi sp, sp, -192 +; ZDINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill ; ZDINX64-NEXT: lw t0, 224(sp) -; ZDINX64-NEXT: sd t0, 88(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: lw t6, 256(sp) -; ZDINX64-NEXT: lw t5, 264(sp) -; ZDINX64-NEXT: lw t4, 272(sp) -; ZDINX64-NEXT: lw s0, 280(sp) +; ZDINX64-NEXT: lw t1, 232(sp) +; ZDINX64-NEXT: lw t2, 240(sp) +; ZDINX64-NEXT: lw s0, 248(sp) +; ZDINX64-NEXT: lw t3, 256(sp) +; ZDINX64-NEXT: lw t4, 264(sp) +; ZDINX64-NEXT: lw t5, 272(sp) +; ZDINX64-NEXT: lw t6, 280(sp) ; ZDINX64-NEXT: lw s1, 288(sp) ; ZDINX64-NEXT: lw s2, 296(sp) ; ZDINX64-NEXT: lw s3, 304(sp) @@ -1295,49 +1259,45 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: lw s10, 360(sp) ; ZDINX64-NEXT: lw s11, 368(sp) ; ZDINX64-NEXT: lw ra, 376(sp) -; ZDINX64-NEXT: lw t3, 384(sp) -; ZDINX64-NEXT: lw t2, 392(sp) -; ZDINX64-NEXT: lw t1, 400(sp) -; ZDINX64-NEXT: lw t0, 408(sp) -; ZDINX64-NEXT: sw t0, 76(sp) -; ZDINX64-NEXT: sw t1, 72(sp) -; ZDINX64-NEXT: sw t2, 68(sp) -; ZDINX64-NEXT: sw t3, 64(sp) -; ZDINX64-NEXT: sw ra, 60(sp) -; ZDINX64-NEXT: sw s11, 56(sp) -; ZDINX64-NEXT: sw s10, 52(sp) -; ZDINX64-NEXT: sw s9, 48(sp) -; ZDINX64-NEXT: sw s8, 44(sp) -; ZDINX64-NEXT: sw s7, 40(sp) -; ZDINX64-NEXT: sw s6, 36(sp) -; ZDINX64-NEXT: sw s5, 32(sp) -; ZDINX64-NEXT: sw s4, 28(sp) -; ZDINX64-NEXT: sw s3, 24(sp) -; ZDINX64-NEXT: sw s2, 20(sp) -; ZDINX64-NEXT: sw s1, 16(sp) +; ZDINX64-NEXT: sw ra, 76(sp) +; ZDINX64-NEXT: sw s11, 72(sp) +; ZDINX64-NEXT: sw s10, 68(sp) +; ZDINX64-NEXT: sw s9, 64(sp) +; ZDINX64-NEXT: sw s8, 60(sp) +; ZDINX64-NEXT: sw s7, 56(sp) +; ZDINX64-NEXT: sw s6, 52(sp) +; ZDINX64-NEXT: sw s5, 48(sp) +; ZDINX64-NEXT: sw s4, 44(sp) +; ZDINX64-NEXT: sw s3, 40(sp) +; ZDINX64-NEXT: sw s2, 36(sp) +; ZDINX64-NEXT: sw s1, 32(sp) +; ZDINX64-NEXT: sw t6, 28(sp) +; ZDINX64-NEXT: sw t5, 24(sp) +; ZDINX64-NEXT: sw t4, 20(sp) +; ZDINX64-NEXT: sw t3, 16(sp) +; ZDINX64-NEXT: lw t3, 192(sp) +; ZDINX64-NEXT: lw t4, 200(sp) +; ZDINX64-NEXT: lw t5, 208(sp) +; ZDINX64-NEXT: lw t6, 216(sp) ; ZDINX64-NEXT: sw s0, 12(sp) -; ZDINX64-NEXT: sw t4, 8(sp) -; ZDINX64-NEXT: sw t5, 4(sp) -; ZDINX64-NEXT: sw t6, 0(sp) -; ZDINX64-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t5, 104(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld t6, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: sw t2, 8(sp) +; ZDINX64-NEXT: sw t1, 4(sp) +; ZDINX64-NEXT: sw t0, 0(sp) ; ZDINX64-NEXT: call callee_float_32 -; ZDINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: addi sp, sp, 224 +; ZDINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: addi sp, sp, 192 ; ZDINX64-NEXT: ret %C = call fastcc float @callee_float_32(<32 x float> %A) ret float %C diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index b033c75eeadd8..27829f2b65759 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -2877,14 +2877,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV32IZFHMIN-LABEL: fsgnjx_f16: ; RV32IZFHMIN: # %bb.0: ; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV32IZFHMIN-NEXT: fmv.x.h a0, fa0 -; RV32IZFHMIN-NEXT: lui a1, 1048568 -; RV32IZFHMIN-NEXT: and a0, a0, a1 -; RV32IZFHMIN-NEXT: fmv.x.h a1, fa5 -; RV32IZFHMIN-NEXT: slli a1, a1, 17 -; RV32IZFHMIN-NEXT: srli a1, a1, 17 -; RV32IZFHMIN-NEXT: or a0, a1, a0 +; RV32IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) +; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0 +; RV32IZFHMIN-NEXT: lui a2, 1048568 +; RV32IZFHMIN-NEXT: and a1, a1, a2 +; RV32IZFHMIN-NEXT: slli a0, a0, 17 +; RV32IZFHMIN-NEXT: srli a0, a0, 17 +; RV32IZFHMIN-NEXT: or a0, a0, a1 ; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa1 @@ -2895,14 +2894,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; RV64IZFHMIN-LABEL: fsgnjx_f16: ; RV64IZFHMIN: # %bb.0: ; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV64IZFHMIN-NEXT: flh fa5, %lo(.LCPI23_0)(a0) -; RV64IZFHMIN-NEXT: fmv.x.h a0, fa0 -; RV64IZFHMIN-NEXT: lui a1, 1048568 -; RV64IZFHMIN-NEXT: and a0, a0, a1 -; RV64IZFHMIN-NEXT: fmv.x.h a1, fa5 -; RV64IZFHMIN-NEXT: slli a1, a1, 49 -; RV64IZFHMIN-NEXT: srli a1, a1, 49 -; RV64IZFHMIN-NEXT: or a0, a1, a0 +; RV64IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) +; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0 +; RV64IZFHMIN-NEXT: lui a2, 1048568 +; RV64IZFHMIN-NEXT: and a1, a1, a2 +; RV64IZFHMIN-NEXT: slli a0, a0, 49 +; RV64IZFHMIN-NEXT: srli a0, a0, 49 +; RV64IZFHMIN-NEXT: or a0, a0, a1 ; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll index 170e71af09b49..727e03125176a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll @@ -40,8 +40,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1) -; RV32-ZFBFMIN-NEXT: flh fa5, 0(a0) -; RV32-ZFBFMIN-NEXT: fmv.x.h a0, fa5 +; RV32-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFBFMIN-NEXT: csrr a0, vlenb ; RV32-ZFBFMIN-NEXT: slli a0, a0, 1 @@ -71,8 +70,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1) -; RV64-ZFBFMIN-NEXT: flh fa5, 0(a0) -; RV64-ZFBFMIN-NEXT: fmv.x.h a0, fa5 +; RV64-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFBFMIN-NEXT: csrr a0, vlenb ; RV64-ZFBFMIN-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index b5d3e2cd776f2..bf2eb3ff0261a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -220,8 +220,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-ZFHMIN-NEXT: vse16.v v8, (a1) -; RV32-ZFHMIN-NEXT: flh fa5, 0(a0) -; RV32-ZFHMIN-NEXT: fmv.x.h a0, fa5 +; RV32-ZFHMIN-NEXT: lh a0, 0(a0) ; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFHMIN-NEXT: csrr a0, vlenb ; RV32-ZFHMIN-NEXT: slli a0, a0, 1 @@ -251,8 +250,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload ; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-ZFHMIN-NEXT: vse16.v v8, (a1) -; RV64-ZFHMIN-NEXT: flh fa5, 0(a0) -; RV64-ZFHMIN-NEXT: fmv.x.h a0, fa5 +; RV64-ZFHMIN-NEXT: lh a0, 0(a0) ; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFHMIN-NEXT: csrr a0, vlenb ; RV64-ZFHMIN-NEXT: slli a0, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 5ab8eab091c2e..d665d23dec68a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -516,41 +516,33 @@ define void @fabs_v8f16(ptr %x) { ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV32-NEXT: lui a2, 8 +; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: and a2, a4, a2 ; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) ; ZVFHMIN-RV32-NEXT: addi sp, sp, 16 @@ -564,41 +556,33 @@ define void @fabs_v8f16(ptr %x) { ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV64-NEXT: lui a2, 8 +; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: and a2, a4, a2 ; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) ; ZVFHMIN-RV64-NEXT: addi sp, sp, 16 @@ -628,41 +612,33 @@ define void @fabs_v6f16(ptr %x) { ; ZVFHMIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV32-NEXT: lui a2, 8 +; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV32-NEXT: and a1, a1, a2 ; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV32-NEXT: and a1, a1, a3 +; ZVFHMIN-RV32-NEXT: and a2, a4, a2 ; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) @@ -678,41 +654,33 @@ define void @fabs_v6f16(ptr %x) { ; ZVFHMIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) +; ZVFHMIN-RV64-NEXT: lui a2, 8 +; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) +; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) ; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) +; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 +; ZVFHMIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 +; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-RV64-NEXT: and a1, a1, a2 ; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-RV64-NEXT: and a1, a1, a3 +; ZVFHMIN-RV64-NEXT: and a2, a4, a2 ; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) @@ -898,71 +866,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 @@ -982,71 +934,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 @@ -1202,71 +1138,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 @@ -1288,71 +1208,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui t1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, t1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t0, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, t2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, t1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 @@ -1521,50 +1425,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 0(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a3, -1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -1580,50 +1476,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 0(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a3, -1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -1752,54 +1640,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 2(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 0(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a4, -1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 10(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a4 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a1, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a3, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) @@ -1815,54 +1695,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 2(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 0(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a4, a4, -1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 10(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a4 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a1, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a3, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) @@ -2051,77 +1923,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -2136,77 +1992,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t @@ -2360,78 +2200,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu @@ -2447,78 +2271,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) ; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a2, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and t1, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, t1, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 +; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, t2, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a7, t1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h t0, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a7 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, t0, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu @@ -2678,38 +2486,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0) @@ -2730,38 +2530,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0) @@ -2885,38 +2677,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma @@ -2939,38 +2723,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 ; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa3 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a7, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp) ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 +; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) +; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 +; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 ; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 ; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma From abe0dd195a3b2630afdc5c1c233eb2a068b2d72f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 25 Sep 2024 10:32:40 -0700 Subject: [PATCH 200/212] [llvm-objdump] Print ... even if a data mapping symbol is active Swap `!DisassembleZeroes` and `if (DumpARMELFData)` conditions so that in the false DisassembleZeroes case (default), `...` will be printed for long consecutive zeroes, even when a data mapping symbol is active. This is especially useful for certain lld tests that insert a huge padding within a code section. Without `...` the output will be huge. Pull Request: https://github.com/llvm/llvm-project/pull/109553 --- lld/test/ELF/aarch64-undefined-weak.s | 2 +- llvm/test/MC/ARM/ltorg-range.s | 2 +- .../llvm-objdump/ELF/AArch64/zeroes.test | 66 +++++++++++++++++++ .../tools/llvm-objdump/ELF/ARM/zeroes.test | 47 +++++++++++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 35 +++++----- 5 files changed, 133 insertions(+), 19 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test create mode 100644 llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test diff --git a/lld/test/ELF/aarch64-undefined-weak.s b/lld/test/ELF/aarch64-undefined-weak.s index f4628453ec3fe..015f9c9a043e5 100644 --- a/lld/test/ELF/aarch64-undefined-weak.s +++ b/lld/test/ELF/aarch64-undefined-weak.s @@ -1,7 +1,7 @@ // REQUIRES: aarch64 // RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o // RUN: ld.lld --image-base=0x10000000 %t.o -o %t -// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s +// RUN: llvm-objdump -d -z --no-show-raw-insn %t | FileCheck %s // Check that the ARM 64-bit ABI rules for undefined weak symbols are applied. // Branch instructions are resolved to the next instruction. Undefined diff --git a/llvm/test/MC/ARM/ltorg-range.s b/llvm/test/MC/ARM/ltorg-range.s index 5c27d4cd0df26..88b9bb3cb5be8 100644 --- a/llvm/test/MC/ARM/ltorg-range.s +++ b/llvm/test/MC/ARM/ltorg-range.s @@ -1,5 +1,5 @@ @ RUN: llvm-mc -triple armv7-unknown-linux-gnueabi -filetype obj -o - %s \ -@ RUN: | llvm-objdump -d - | FileCheck %s +@ RUN: | llvm-objdump -d -z - | FileCheck %s ldr r0, =0x01020304 @ CHECK: ldr diff --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test b/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test new file mode 100644 index 0000000000000..a56d056f8a225 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test @@ -0,0 +1,66 @@ +## Test zero dumping when a data mapping symbol is active. +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t +# RUN: llvm-objdump -t -d %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: 0000000000000000 l .text 0000000000000000 $d +# CHECK-NEXT: 000000000000000c l .text 0000000000000000 $x +# CHECK-NEXT: 0000000000000010 l .text 0000000000000000 $d + +# CHECK: 0000000000000000 <_start>: +# CHECK-NEXT: ... +# CHECK-NEXT: 8: 01 00 00 00 .word 0x00000001 +# CHECK-NEXT: c: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 18: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 2c: d503201f nop +# CHECK-NEXT: ... +# CHECK-NEXT: 48: d503201f nop + +# RUN: llvm-objdump -d -z %t | FileCheck %s --check-prefix=ZERO + +# ZERO: 0000000000000000 <_start>: +# ZERO-NEXT: 0: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 4: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 8: 01 00 00 00 .word 0x00000001 +# ZERO-NEXT: c: d503201f nop +# ZERO-NEXT: 10: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 14: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 18: d503201f nop + +## Check we do not skip zeroes blocks if have relocations pointed to these places. +# RUN: llvm-objdump -d -r %t | FileCheck %s --check-prefix=RELOC + +# RELOC: 0000000000000000 <_start>: +# RELOC-NEXT: ... +# RELOC-NEXT: 8: 01 00 00 00 .word 0x00000001 +# RELOC-NEXT: c: d503201f nop +# RELOC-NEXT: ... +# RELOC-NEXT: 18: d503201f nop +# RELOC-NEXT: 1c: 00 00 00 00 .word 0x00000000 +# RELOC-NEXT: 000000000000001c: R_AARCH64_ABS64 x1 +# RELOC-NEXT: ... +# RELOC-NEXT: 2c: d503201f nop +# RELOC-NEXT: ... +# RELOC-NEXT: 38: 00 00 00 00 .word 0x00000000 +# RELOC-NEXT: 0000000000000038: R_AARCH64_ABS64 x2 +# RELOC-NEXT: ... +# RELOC-NEXT: 48: d503201f nop + +.globl _start +_start: + .space 8 + .long 1 + nop + .space 8 + nop + + .quad x1 + .space 8 + nop + + .space 8 + .quad x2 + .space 8 + nop diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test b/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test new file mode 100644 index 0000000000000..8601343bd146e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test @@ -0,0 +1,47 @@ +## Test zero dumping when a data mapping symbol is active. +# RUN: llvm-mc -filetype=obj -triple=armv7 %s -o %t +# RUN: llvm-objdump -t -d %t | FileCheck %s + +# CHECK: SYMBOL TABLE: +# CHECK-NEXT: 00000000 l .text 00000000 $d +# CHECK-NEXT: 0000000c l .text 00000000 $a +# CHECK-NEXT: 00000010 l .text 00000000 $d + +# CHECK: 00000000 <_start>: +# CHECK-NEXT: ... +# CHECK-NEXT: 8: 01 00 00 00 .word 0x00000001 +# CHECK-NEXT: c: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 18: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 28: e320f000 +# CHECK-NEXT: ... +# CHECK-NEXT: 40: e320f000 + +# RUN: llvm-objdump -d -z --triple=armv7 %t | FileCheck %s --check-prefix=ZERO + +# ZERO: 00000000 <_start>: +# ZERO-NEXT: 0: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 4: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 8: 01 00 00 00 .word 0x00000001 +# ZERO-NEXT: c: e320f000 nop +# ZERO-NEXT: 10: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 14: 00 00 00 00 .word 0x00000000 +# ZERO-NEXT: 18: e320f000 nop + +.globl _start +_start: + .space 8 + .long 1 + nop + .space 8 + nop + + .long x1 + .space 8 + nop + + .space 8 + .long x2 + .space 8 + nop diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b69d14b4e7609..8073c898b8a14 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2244,27 +2244,28 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, return false; }; + // When -z or --disassemble-zeroes are given we always dissasemble + // them. Otherwise we might want to skip zero bytes we see. + if (!DisassembleZeroes) { + uint64_t MaxOffset = End - Index; + // For --reloc: print zero blocks patched by relocations, so that + // relocations can be shown in the dump. + if (InlineRelocs && RelCur != RelEnd) + MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index, + MaxOffset); + + if (size_t N = + countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { + FOS << "\t\t..." << '\n'; + Index += N; + continue; + } + } + if (DumpARMELFData) { Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, MappingSymbols, *DT->SubtargetInfo, FOS); } else { - // When -z or --disassemble-zeroes are given we always dissasemble - // them. Otherwise we might want to skip zero bytes we see. - if (!DisassembleZeroes) { - uint64_t MaxOffset = End - Index; - // For --reloc: print zero blocks patched by relocations, so that - // relocations can be shown in the dump. - if (InlineRelocs && RelCur != RelEnd) - MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index, - MaxOffset); - - if (size_t N = - countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { - FOS << "\t\t..." << '\n'; - Index += N; - continue; - } - } if (DumpTracebackTableForXCOFFFunction && doesXCOFFTracebackTableBegin(Bytes.slice(Index, 4))) { From b1aea98cfa357e23f4bb52232da5f41781f23bff Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 25 Sep 2024 10:36:44 -0700 Subject: [PATCH 201/212] [clang] Make deprecations of some `FileManager` APIs formal (#110014) Some `FileManager` APIs still return `{File,Directory}Entry` instead of the preferred `{File,Directory}EntryRef`. These are documented to be deprecated, but don't have the attribute that warns on their usage. This PR marks them as such with `LLVM_DEPRECATED()` and replaces their usage with the recommended counterparts. NFCI. --- .../clang-move/tool/ClangMove.cpp | 2 +- clang-tools-extra/clangd/SourceCode.cpp | 4 +- .../clangd/unittests/ParsedASTTests.cpp | 4 +- .../unittests/FindHeadersTest.cpp | 2 +- .../include-cleaner/unittests/RecordTest.cpp | 84 +++++++++---------- .../include/common/VirtualFileHelper.h | 2 +- .../clang/Basic/DiagnosticFrontendKinds.td | 2 - clang/include/clang/Basic/FileManager.h | 8 +- clang/lib/AST/ASTImporter.cpp | 4 +- clang/lib/CodeGen/CodeGenAction.cpp | 4 +- clang/lib/ExtractAPI/ExtractAPIConsumer.cpp | 4 +- clang/lib/Frontend/ASTUnit.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 10 +-- .../lib/Frontend/Rewrite/FrontendActions.cpp | 2 +- clang/lib/InstallAPI/Frontend.cpp | 2 +- clang/lib/Lex/HeaderSearch.cpp | 8 +- clang/lib/Lex/ModuleMap.cpp | 3 +- clang/lib/Lex/PPLexerChange.cpp | 2 +- clang/lib/Serialization/ASTReader.cpp | 6 +- clang/lib/Serialization/ModuleManager.cpp | 12 +-- clang/lib/Tooling/Core/Replacement.cpp | 2 +- .../DependencyScanning/ModuleDepCollector.cpp | 8 +- clang/tools/clang-installapi/Options.cpp | 2 +- clang/tools/clang-refactor/ClangRefactor.cpp | 2 +- clang/tools/clang-refactor/TestSupport.cpp | 2 +- clang/unittests/Basic/FileManagerTest.cpp | 77 +++++++++-------- clang/unittests/Basic/SourceManagerTest.cpp | 2 +- .../Frontend/CompilerInstanceTest.cpp | 2 +- 28 files changed, 135 insertions(+), 129 deletions(-) diff --git a/clang-tools-extra/clang-move/tool/ClangMove.cpp b/clang-tools-extra/clang-move/tool/ClangMove.cpp index 1560dcaad6779..655ea81ee37d4 100644 --- a/clang-tools-extra/clang-move/tool/ClangMove.cpp +++ b/clang-tools-extra/clang-move/tool/ClangMove.cpp @@ -199,7 +199,7 @@ int main(int argc, const char **argv) { for (auto I = Files.begin(), E = Files.end(); I != E; ++I) { OS << " {\n"; OS << " \"FilePath\": \"" << *I << "\",\n"; - const auto Entry = FileMgr.getFile(*I); + const auto Entry = FileMgr.getOptionalFileRef(*I); auto ID = SM.translateFile(*Entry); std::string Content; llvm::raw_string_ostream ContentStream(Content); diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 3af99b9db056d..780aaa471dc8b 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -814,8 +814,8 @@ llvm::SmallVector ancestorNamespaces(llvm::StringRef NS) { // Checks whether \p FileName is a valid spelling of main file. bool isMainFile(llvm::StringRef FileName, const SourceManager &SM) { - auto FE = SM.getFileManager().getFile(FileName); - return FE && *FE == SM.getFileEntryForID(SM.getMainFileID()); + auto FE = SM.getFileManager().getOptionalFileRef(FileName); + return FE && FE == SM.getFileEntryRefForID(SM.getMainFileID()); } } // namespace diff --git a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp index 4bb76cd6ab830..6ee641caeefe3 100644 --- a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp +++ b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp @@ -397,10 +397,10 @@ TEST(ParsedASTTest, PatchesAdditionalIncludes) { auto &FM = SM.getFileManager(); // Copy so that we can use operator[] to get the children. IncludeStructure Includes = PatchedAST->getIncludeStructure(); - auto MainFE = FM.getFile(testPath("foo.cpp")); + auto MainFE = FM.getOptionalFileRef(testPath("foo.cpp")); ASSERT_TRUE(MainFE); auto MainID = Includes.getID(*MainFE); - auto AuxFE = FM.getFile(testPath("sub/aux.h")); + auto AuxFE = FM.getOptionalFileRef(testPath("sub/aux.h")); ASSERT_TRUE(AuxFE); auto AuxID = Includes.getID(*AuxFE); EXPECT_THAT(Includes.IncludeChildren[*MainID], Contains(*AuxID)); diff --git a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp index c5fc465ced7a7..84e02e1d0d621 100644 --- a/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/FindHeadersTest.cpp @@ -60,7 +60,7 @@ class FindHeadersTest : public testing::Test { llvm::SmallVector> findHeaders(llvm::StringRef FileName) { return include_cleaner::findHeaders( AST->sourceManager().translateFileLineCol( - AST->fileManager().getFile(FileName).get(), + *AST->fileManager().getOptionalFileRef(FileName), /*Line=*/1, /*Col=*/1), AST->sourceManager(), &PI); } diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp index 0b05c9190cb67..b5a7b9720903e 100644 --- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp @@ -234,7 +234,7 @@ TEST_F(RecordPPTest, CapturesMacroRefs) { const auto &SM = AST.sourceManager(); SourceLocation Def = SM.getComposedLoc( - SM.translateFile(AST.fileManager().getFile("header.h").get()), + SM.translateFile(*AST.fileManager().getOptionalFileRef("header.h")), Header.point("def")); ASSERT_THAT(Recorded.MacroReferences, Not(IsEmpty())); Symbol OrigX = Recorded.MacroReferences.front().Target; @@ -368,29 +368,29 @@ TEST_F(PragmaIncludeTest, IWYUKeep) { TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("normal.h").get())); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("std/vector").get())); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("normal.h"))); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("std/vector"))); // Keep - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep1.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep2.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep3.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep4.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep5.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("keep6.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("std/map").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep1.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep2.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep3.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep4.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep5.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("keep6.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("std/map"))); // Exports - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export1.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export2.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("export3.h").get())); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("std/set").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export1.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export2.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("export3.h"))); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("std/set"))); } TEST_F(PragmaIncludeTest, AssociatedHeader) { createEmptyFiles({"foo/main.h", "bar/main.h", "bar/other.h", "std/vector"}); auto IsKeep = [&](llvm::StringRef Name, TestAST &AST) { - return PI.shouldKeep(AST.fileManager().getFile(Name).get()); + return PI.shouldKeep(*AST.fileManager().getOptionalFileRef(Name)); }; Inputs.FileName = "main.cc"; @@ -452,19 +452,19 @@ TEST_F(PragmaIncludeTest, IWYUPrivate) { // IWYU pragma: private )cpp"; TestAST Processed = build(); - auto PrivateFE = Processed.fileManager().getFile("private.h"); + auto PrivateFE = Processed.fileManager().getOptionalFileRef("private.h"); assert(PrivateFE); - EXPECT_TRUE(PI.isPrivate(PrivateFE.get())); - EXPECT_EQ(PI.getPublic(PrivateFE.get()), "\"public2.h\""); + EXPECT_TRUE(PI.isPrivate(*PrivateFE)); + EXPECT_EQ(PI.getPublic(*PrivateFE), "\"public2.h\""); - auto PublicFE = Processed.fileManager().getFile("public.h"); + auto PublicFE = Processed.fileManager().getOptionalFileRef("public.h"); assert(PublicFE); - EXPECT_EQ(PI.getPublic(PublicFE.get()), ""); // no mapping. - EXPECT_FALSE(PI.isPrivate(PublicFE.get())); + EXPECT_EQ(PI.getPublic(*PublicFE), ""); // no mapping. + EXPECT_FALSE(PI.isPrivate(*PublicFE)); - auto Private2FE = Processed.fileManager().getFile("private2.h"); + auto Private2FE = Processed.fileManager().getOptionalFileRef("private2.h"); assert(Private2FE); - EXPECT_TRUE(PI.isPrivate(Private2FE.get())); + EXPECT_TRUE(PI.isPrivate(*Private2FE)); } TEST_F(PragmaIncludeTest, IWYUExport) { @@ -486,13 +486,13 @@ TEST_F(PragmaIncludeTest, IWYUExport) { const auto &SM = Processed.sourceManager(); auto &FM = Processed.fileManager(); - EXPECT_THAT(PI.getExporters(FM.getFile("private.h").get(), FM), + EXPECT_THAT(PI.getExporters(*FM.getOptionalFileRef("private.h"), FM), testing::UnorderedElementsAre(FileNamed("export1.h"), FileNamed("export3.h"))); - EXPECT_TRUE(PI.getExporters(FM.getFile("export1.h").get(), FM).empty()); - EXPECT_TRUE(PI.getExporters(FM.getFile("export2.h").get(), FM).empty()); - EXPECT_TRUE(PI.getExporters(FM.getFile("export3.h").get(), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export1.h"), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export2.h"), FM).empty()); + EXPECT_TRUE(PI.getExporters(*FM.getOptionalFileRef("export3.h"), FM).empty()); EXPECT_TRUE( PI.getExporters(SM.getFileEntryForID(SM.getMainFileID()), FM).empty()); } @@ -548,23 +548,23 @@ TEST_F(PragmaIncludeTest, IWYUExportBlock) { } return Result; }; - auto Exporters = PI.getExporters(FM.getFile("private1.h").get(), FM); + auto Exporters = PI.getExporters(*FM.getOptionalFileRef("private1.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"), FileNamed("normal.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("private2.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("private2.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("private3.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("private3.h"), FM); EXPECT_THAT(Exporters, testing::UnorderedElementsAre(FileNamed("export1.h"))) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("foo.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("foo.h"), FM); EXPECT_TRUE(Exporters.empty()) << GetNames(Exporters); - Exporters = PI.getExporters(FM.getFile("bar.h").get(), FM); + Exporters = PI.getExporters(*FM.getOptionalFileRef("bar.h"), FM); EXPECT_TRUE(Exporters.empty()) << GetNames(Exporters); } @@ -580,8 +580,8 @@ TEST_F(PragmaIncludeTest, SelfContained) { Inputs.ExtraFiles["unguarded.h"] = ""; TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_TRUE(PI.isSelfContained(FM.getFile("guarded.h").get())); - EXPECT_FALSE(PI.isSelfContained(FM.getFile("unguarded.h").get())); + EXPECT_TRUE(PI.isSelfContained(*FM.getOptionalFileRef("guarded.h"))); + EXPECT_FALSE(PI.isSelfContained(*FM.getOptionalFileRef("unguarded.h"))); } TEST_F(PragmaIncludeTest, AlwaysKeep) { @@ -596,8 +596,8 @@ TEST_F(PragmaIncludeTest, AlwaysKeep) { Inputs.ExtraFiles["usual.h"] = "#pragma once"; TestAST Processed = build(); auto &FM = Processed.fileManager(); - EXPECT_TRUE(PI.shouldKeep(FM.getFile("always_keep.h").get())); - EXPECT_FALSE(PI.shouldKeep(FM.getFile("usual.h").get())); + EXPECT_TRUE(PI.shouldKeep(*FM.getOptionalFileRef("always_keep.h"))); + EXPECT_FALSE(PI.shouldKeep(*FM.getOptionalFileRef("usual.h"))); } TEST_F(PragmaIncludeTest, ExportInUnnamedBuffer) { @@ -653,13 +653,13 @@ TEST_F(PragmaIncludeTest, OutlivesFMAndSM) { // Now this build gives us a new File&Source Manager. TestAST Processed = build(/*ResetPragmaIncludes=*/false); auto &FM = Processed.fileManager(); - auto PrivateFE = FM.getFile("private.h"); + auto PrivateFE = FM.getOptionalFileRef("private.h"); assert(PrivateFE); - EXPECT_EQ(PI.getPublic(PrivateFE.get()), "\"public.h\""); + EXPECT_EQ(PI.getPublic(*PrivateFE), "\"public.h\""); - auto Private2FE = FM.getFile("private2.h"); + auto Private2FE = FM.getOptionalFileRef("private2.h"); assert(Private2FE); - EXPECT_THAT(PI.getExporters(Private2FE.get(), FM), + EXPECT_THAT(PI.getExporters(*Private2FE, FM), testing::ElementsAre(llvm::cantFail(FM.getFileRef("public.h")))); } @@ -676,8 +676,8 @@ TEST_F(PragmaIncludeTest, CanRecordManyTimes) { TestAST Processed = build(); auto &FM = Processed.fileManager(); - auto PrivateFE = FM.getFile("private.h"); - llvm::StringRef Public = PI.getPublic(PrivateFE.get()); + auto PrivateFE = FM.getOptionalFileRef("private.h"); + llvm::StringRef Public = PI.getPublic(*PrivateFE); EXPECT_EQ(Public, "\"public.h\""); // This build populates same PI during build, but this time we don't have diff --git a/clang-tools-extra/unittests/include/common/VirtualFileHelper.h b/clang-tools-extra/unittests/include/common/VirtualFileHelper.h index 18b98d2796e67..abe1067495694 100644 --- a/clang-tools-extra/unittests/include/common/VirtualFileHelper.h +++ b/clang-tools-extra/unittests/include/common/VirtualFileHelper.h @@ -60,7 +60,7 @@ class VirtualFileHelper { I != E; ++I) { std::unique_ptr Buf = llvm::MemoryBuffer::getMemBuffer(I->Code); - const FileEntry *Entry = SM.getFileManager().getVirtualFile( + FileEntryRef Entry = SM.getFileManager().getVirtualFileRef( I->FileName, Buf->getBufferSize(), /*ModificationTime=*/0); SM.overrideFileContents(Entry, std::move(Buf)); } diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 292e4af1b3b30..a6b17ccb6799d 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -109,8 +109,6 @@ def err_fe_expected_clang_command : Error< "expected a clang compiler command">; def err_fe_remap_missing_to_file : Error< "could not remap file '%0' to the contents of file '%1'">, DefaultFatal; -def err_fe_remap_missing_from_file : Error< - "could not remap from missing file '%0'">, DefaultFatal; def err_fe_unable_to_load_pch : Error< "unable to load PCH file">; def err_fe_unable_to_load_plugin : Error< diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 74029a91d1a6d..ce4e8c1fbe16e 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -84,7 +84,7 @@ class FileManager : public RefCountedBase { /// VirtualDirectoryEntries/VirtualFileEntries above. /// llvm::StringMap, llvm::BumpPtrAllocator> - SeenDirEntries; + SeenDirEntries; /// A cache that maps paths to file entries (either real or /// virtual) we have looked up, or an error that occurred when we looked up @@ -190,6 +190,8 @@ class FileManager : public RefCountedBase { /// /// \param CacheFailure If true and the file does not exist, we'll cache /// the failure to find this file. + LLVM_DEPRECATED("Functions returning DirectoryEntry are deprecated.", + "getOptionalDirectoryRef()") llvm::ErrorOr getDirectory(StringRef DirName, bool CacheFailure = true); @@ -207,6 +209,8 @@ class FileManager : public RefCountedBase { /// /// \param CacheFailure If true and the file does not exist, we'll cache /// the failure to find this file. + LLVM_DEPRECATED("Functions returning FileEntry are deprecated.", + "getOptionalFileRef()") llvm::ErrorOr getFile(StringRef Filename, bool OpenFile = false, bool CacheFailure = true); @@ -269,6 +273,8 @@ class FileManager : public RefCountedBase { FileEntryRef getVirtualFileRef(StringRef Filename, off_t Size, time_t ModificationTime); + LLVM_DEPRECATED("Functions returning FileEntry are deprecated.", + "getVirtualFileRef()") const FileEntry *getVirtualFile(StringRef Filename, off_t Size, time_t ModificationTime); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index bba97e289da2e..60175f1ccb342 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -10020,8 +10020,8 @@ Expected ASTImporter::Import(FileID FromID, bool IsBuiltin) { ToIncludeLocOrFakeLoc = ToSM.getLocForStartOfFile(ToSM.getMainFileID()); if (Cache->OrigEntry && Cache->OrigEntry->getDir()) { - // FIXME: We probably want to use getVirtualFile(), so we don't hit the - // disk again + // FIXME: We probably want to use getVirtualFileRef(), so we don't hit + // the disk again // FIXME: We definitely want to re-use the existing MemoryBuffer, rather // than mmap the files several times. auto Entry = diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 883333f0924dd..c9f9b688d0d8a 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -586,9 +586,9 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( if (D.isLocationAvailable()) { D.getLocation(Filename, Line, Column); if (Line > 0) { - auto FE = FileMgr.getFile(Filename); + auto FE = FileMgr.getOptionalFileRef(Filename); if (!FE) - FE = FileMgr.getFile(D.getAbsolutePath()); + FE = FileMgr.getOptionalFileRef(D.getAbsolutePath()); if (FE) { // If -gcolumn-info was not used, Column will be 0. This upsets the // source manager, so pass 1 if Column is not set. diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp index 75c2dec22400b..6f42b36bd36a4 100644 --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -217,8 +217,8 @@ struct LocationFileChecker { SmallVector, bool>> &KnownFiles) : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() { for (const auto &KnownFile : KnownFiles) - if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first)) - KnownFileEntries.insert(*FileEntry); + if (auto FE = CI.getFileManager().getOptionalFileRef(KnownFile.first)) + KnownFileEntries.insert(*FE); } private: diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index 93836ec5402fa..bffff0d27af3a 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -2395,7 +2395,7 @@ void ASTUnit::TranslateStoredDiagnostics( // Rebuild the StoredDiagnostic. if (SD.Filename.empty()) continue; - auto FE = FileMgr.getFile(SD.Filename); + auto FE = FileMgr.getOptionalFileRef(SD.Filename); if (!FE) continue; SourceLocation FileLoc; diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 5f2a9637e3ea4..240305b33824b 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -427,12 +427,8 @@ static void InitializeFileRemapping(DiagnosticsEngine &Diags, } // Create the file entry for the file that we're mapping from. - const FileEntry *FromFile = - FileMgr.getVirtualFile(RF.first, ToFile->getSize(), 0); - if (!FromFile) { - Diags.Report(diag::err_fe_remap_missing_from_file) << RF.first; - continue; - } + FileEntryRef FromFile = + FileMgr.getVirtualFileRef(RF.first, ToFile->getSize(), 0); // Override the contents of the "from" file with the contents of // the "to" file. @@ -1926,7 +1922,7 @@ ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( // Check whether M refers to the file in the prebuilt module path. if (M && M->getASTFile()) - if (auto ModuleFile = FileMgr->getFile(ModuleFilename)) + if (auto ModuleFile = FileMgr->getOptionalFileRef(ModuleFilename)) if (*ModuleFile == M->getASTFile()) return M; diff --git a/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/clang/lib/Frontend/Rewrite/FrontendActions.cpp index cf5a9437e89e6..6e1f949f543a5 100644 --- a/clang/lib/Frontend/Rewrite/FrontendActions.cpp +++ b/clang/lib/Frontend/Rewrite/FrontendActions.cpp @@ -213,7 +213,7 @@ class RewriteIncludesAction::RewriteImportsListener : public ASTReaderListener { void visitModuleFile(StringRef Filename, serialization::ModuleKind Kind) override { - auto File = CI.getFileManager().getFile(Filename); + auto File = CI.getFileManager().getOptionalFileRef(Filename); assert(File && "missing file for loaded module?"); // Only rewrite each module file once. diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp index 04d06f46d2652..2ebe72bf021cf 100644 --- a/clang/lib/InstallAPI/Frontend.cpp +++ b/clang/lib/InstallAPI/Frontend.cpp @@ -107,7 +107,7 @@ InstallAPIContext::findAndRecordFile(const FileEntry *FE, } void InstallAPIContext::addKnownHeader(const HeaderFile &H) { - auto FE = FM->getFile(H.getPath()); + auto FE = FM->getOptionalFileRef(H.getPath()); if (!FE) return; // File does not exist. KnownFiles[*FE] = H.getType(); diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 4914c10e62d0c..8826ab449df49 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -227,7 +227,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, ".pcm"); else llvm::sys::path::append(Result, ModuleName + ".pcm"); - if (getFileMgr().getFile(Result.str())) + if (getFileMgr().getOptionalFileRef(Result)) return std::string(Result); } @@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) { llvm::sys::path::append(CachePath, ModuleCacheHash); std::string FileName = getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath); - if (!FileName.empty() && getFileMgr().getFile(FileName)) + if (!FileName.empty() && getFileMgr().getOptionalFileRef(FileName)) return FileName; } return {}; @@ -655,7 +655,7 @@ OptionalFileEntryRef DirectoryLookup::DoFrameworkLookup( ++NumFrameworkLookups; // If the framework dir doesn't exist, we fail. - auto Dir = FileMgr.getDirectory(FrameworkName); + auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkName); if (!Dir) return std::nullopt; @@ -718,7 +718,7 @@ OptionalFileEntryRef DirectoryLookup::DoFrameworkLookup( bool FoundFramework = false; do { // Determine whether this directory exists. - auto Dir = FileMgr.getDirectory(FrameworkPath); + auto Dir = FileMgr.getOptionalDirectoryRef(FrameworkPath); if (!Dir) break; diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index eed7eca2e7356..2aada51c71c50 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -1144,7 +1144,8 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, if (SubframeworkDirName.empty()) break; - if (auto SubDir = FileMgr.getDirectory(SubframeworkDirName)) { + if (auto SubDir = + FileMgr.getOptionalDirectoryRef(SubframeworkDirName)) { if (*SubDir == FrameworkDir) { FoundParent = true; break; diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 8221db46e06ac..1a71f03b18236 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -229,7 +229,7 @@ static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir, StringRef FilePath = File.getDir().getName(); StringRef Path = FilePath; while (!Path.empty()) { - if (auto CurDir = FM.getDirectory(Path)) { + if (auto CurDir = FM.getOptionalDirectoryRef(Path)) { if (*CurDir == Dir) { Result = FilePath.substr(Path.size()); llvm::sys::path::append(Result, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a369ad0be4795..1f7946e61d175 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2044,14 +2044,14 @@ ASTReader::getGlobalPreprocessedEntityID(ModuleFile &M, const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) { FileManager &FileMgr = Reader.getFileManager(); if (!Key.Imported) { - if (auto File = FileMgr.getFile(Key.Filename)) + if (auto File = FileMgr.getOptionalFileRef(Key.Filename)) return *File; return nullptr; } std::string Resolved = std::string(Key.Filename); Reader.ResolveImportedPath(M, Resolved); - if (auto File = FileMgr.getFile(Resolved)) + if (auto File = FileMgr.getOptionalFileRef(Resolved)) return *File; return nullptr; } @@ -4217,7 +4217,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F, assert(M && M->Name == F.ModuleName && "found module with different name"); // Check the primary module map file. - auto StoredModMap = FileMgr.getFile(F.ModuleMapPath); + auto StoredModMap = FileMgr.getOptionalFileRef(F.ModuleMapPath); if (!StoredModMap || *StoredModMap != ModMap) { assert(ModMap && "found module is missing module map file"); assert((ImportedBy || F.Kind == MK_ImplicitModule) && diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index 51b6429412960..e74a16b636802 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -42,8 +42,8 @@ using namespace clang; using namespace serialization; ModuleFile *ModuleManager::lookupByFileName(StringRef Name) const { - auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false, - /*CacheFailure=*/false); + auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, + /*CacheFailure=*/false); if (Entry) return lookup(*Entry); @@ -64,8 +64,8 @@ ModuleFile *ModuleManager::lookup(const FileEntry *File) const { std::unique_ptr ModuleManager::lookupBuffer(StringRef Name) { - auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false, - /*CacheFailure=*/false); + auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false, + /*CacheFailure=*/false); if (!Entry) return nullptr; return std::move(InMemoryBuffers[*Entry]); @@ -279,8 +279,8 @@ void ModuleManager::removeModules(ModuleIterator First) { void ModuleManager::addInMemoryBuffer(StringRef FileName, std::unique_ptr Buffer) { - const FileEntry *Entry = - FileMgr.getVirtualFile(FileName, Buffer->getBufferSize(), 0); + FileEntryRef Entry = + FileMgr.getVirtualFileRef(FileName, Buffer->getBufferSize(), 0); InMemoryBuffers[Entry] = std::move(Buffer); } diff --git a/clang/lib/Tooling/Core/Replacement.cpp b/clang/lib/Tooling/Core/Replacement.cpp index 89a5b15244274..92e9859ca206e 100644 --- a/clang/lib/Tooling/Core/Replacement.cpp +++ b/clang/lib/Tooling/Core/Replacement.cpp @@ -614,7 +614,7 @@ std::map groupReplacementsByFile( std::map Result; llvm::SmallPtrSet ProcessedFileEntries; for (const auto &Entry : FileToReplaces) { - auto FE = FileMgr.getFile(Entry.first); + auto FE = FileMgr.getOptionalFileRef(Entry.first); if (!FE) llvm::errs() << "File path " << Entry.first << " is invalid.\n"; else if (ProcessedFileEntries.insert(*FE).second) diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index c775adc0ddd73..677f426590ab9 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -241,7 +241,7 @@ ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs( ModuleMapInputKind); auto CurrentModuleMapEntry = - ScanInstance.getFileManager().getFile(Deps.ClangModuleMapFile); + ScanInstance.getFileManager().getOptionalFileRef(Deps.ClangModuleMapFile); assert(CurrentModuleMapEntry && "module map file entry not found"); // Remove directly passed modulemap files. They will get added back if they @@ -251,7 +251,8 @@ ModuleDepCollector::getInvocationAdjustedForModuleBuildWithoutOutputs( auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps); for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) { // TODO: Track these as `FileEntryRef` to simplify the equality check below. - auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile); + auto ModuleMapEntry = + ScanInstance.getFileManager().getOptionalFileRef(ModuleMapFile); assert(ModuleMapEntry && "module map file entry not found"); // Don't report module maps describing eagerly-loaded dependency. This @@ -299,7 +300,8 @@ llvm::DenseSet ModuleDepCollector::collectModuleMapFiles( ModuleDeps *MD = ModuleDepsByID.lookup(MID); assert(MD && "Inconsistent dependency info"); // TODO: Track ClangModuleMapFile as `FileEntryRef`. - auto FE = ScanInstance.getFileManager().getFile(MD->ClangModuleMapFile); + auto FE = ScanInstance.getFileManager().getOptionalFileRef( + MD->ClangModuleMapFile); assert(FE && "Missing module map file that was previously found"); ModuleMapFiles.insert(*FE); } diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index 1ca1d583d5ccd..3fa79636de5d7 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -554,7 +554,7 @@ bool Options::processFrontendOptions(InputArgList &Args) { bool Options::addFilePaths(InputArgList &Args, PathSeq &Headers, OptSpecifier ID) { for (const StringRef Path : Args.getAllArgValues(ID)) { - if ((bool)FM->getDirectory(Path, /*CacheFailure=*/false)) { + if ((bool)FM->getOptionalDirectoryRef(Path, /*CacheFailure=*/false)) { auto InputHeadersOrErr = enumerateFiles(*FM, Path); if (!InputHeadersOrErr) { Diags->Report(diag::err_cannot_open_file) diff --git a/clang/tools/clang-refactor/ClangRefactor.cpp b/clang/tools/clang-refactor/ClangRefactor.cpp index 9310263c446ae..968f0594085d4 100644 --- a/clang/tools/clang-refactor/ClangRefactor.cpp +++ b/clang/tools/clang-refactor/ClangRefactor.cpp @@ -117,7 +117,7 @@ class SourceRangeSelectionArgument final : public SourceSelectionArgument { bool forAllRanges(const SourceManager &SM, llvm::function_ref Callback) override { - auto FE = SM.getFileManager().getFile(Range.FileName); + auto FE = SM.getFileManager().getOptionalFileRef(Range.FileName); FileID FID = FE ? SM.translateFile(*FE) : FileID(); if (!FE || FID.isInvalid()) { llvm::errs() << "error: -selection=" << Range.FileName diff --git a/clang/tools/clang-refactor/TestSupport.cpp b/clang/tools/clang-refactor/TestSupport.cpp index 3fae18c2109a6..8b6e250b3632d 100644 --- a/clang/tools/clang-refactor/TestSupport.cpp +++ b/clang/tools/clang-refactor/TestSupport.cpp @@ -43,7 +43,7 @@ void TestSelectionRangesInFile::dump(raw_ostream &OS) const { bool TestSelectionRangesInFile::foreachRange( const SourceManager &SM, llvm::function_ref Callback) const { - auto FE = SM.getFileManager().getFile(Filename); + auto FE = SM.getFileManager().getOptionalFileRef(Filename); FileID FID = FE ? SM.translateFile(*FE) : FileID(); if (!FE || FID.isInvalid()) { llvm::errs() << "error: -selection=test:" << Filename diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp index d32036d975ce9..53897322f6160 100644 --- a/clang/unittests/Basic/FileManagerTest.cpp +++ b/clang/unittests/Basic/FileManagerTest.cpp @@ -116,9 +116,9 @@ TEST_F(FileManagerTest, NoVirtualDirectoryExistsBeforeAVirtualFileIsAdded) { // by what's in the real file system. manager.setStatCache(std::make_unique()); - ASSERT_FALSE(manager.getDirectory("virtual/dir/foo")); - ASSERT_FALSE(manager.getDirectory("virtual/dir")); - ASSERT_FALSE(manager.getDirectory("virtual")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual/dir/foo")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual/dir")); + ASSERT_FALSE(manager.getOptionalDirectoryRef("virtual")); } // When a virtual file is added, all of its ancestors should be created. @@ -126,10 +126,12 @@ TEST_F(FileManagerTest, getVirtualFileCreatesDirectoryEntriesForAncestors) { // Fake an empty real file system. manager.setStatCache(std::make_unique()); - manager.getVirtualFile("virtual/dir/bar.h", 100, 0); - ASSERT_FALSE(manager.getDirectory("virtual/dir/foo")); + manager.getVirtualFileRef("virtual/dir/bar.h", 100, 0); - auto dir = manager.getDirectoryRef("virtual/dir"); + auto dir = manager.getDirectoryRef("virtual/dir/foo"); + ASSERT_THAT_EXPECTED(dir, llvm::Failed()); + + dir = manager.getDirectoryRef("virtual/dir"); ASSERT_THAT_EXPECTED(dir, llvm::Succeeded()); EXPECT_EQ("virtual/dir", dir->getName()); @@ -172,7 +174,7 @@ TEST_F(FileManagerTest, getFileReturnsValidFileEntryForExistingVirtualFile) { // Fake an empty real file system. manager.setStatCache(std::make_unique()); - manager.getVirtualFile("virtual/dir/bar.h", 100, 0); + manager.getVirtualFileRef("virtual/dir/bar.h", 100, 0); auto file = manager.getFileRef("virtual/dir/bar.h"); ASSERT_THAT_EXPECTED(file, llvm::Succeeded()); EXPECT_EQ("virtual/dir/bar.h", file->getName()); @@ -190,11 +192,11 @@ TEST_F(FileManagerTest, getFileReturnsDifferentFileEntriesForDifferentFiles) { statCache->InjectFile("bar.cpp", 43); manager.setStatCache(std::move(statCache)); - auto fileFoo = manager.getFile("foo.cpp"); - auto fileBar = manager.getFile("bar.cpp"); + auto fileFoo = manager.getOptionalFileRef("foo.cpp"); + auto fileBar = manager.getOptionalFileRef("bar.cpp"); ASSERT_TRUE(fileFoo); ASSERT_TRUE(fileBar); - EXPECT_NE(*fileFoo, *fileBar); + EXPECT_NE(&fileFoo->getFileEntry(), &fileBar->getFileEntry()); } // getFile() returns an error if neither a real file nor a virtual file @@ -208,19 +210,22 @@ TEST_F(FileManagerTest, getFileReturnsErrorForNonexistentFile) { manager.setStatCache(std::move(statCache)); // Create a virtual bar.cpp file. - manager.getVirtualFile("bar.cpp", 200, 0); + manager.getVirtualFileRef("bar.cpp", 200, 0); - auto file = manager.getFile("xyz.txt"); + auto file = manager.getFileRef("xyz.txt"); ASSERT_FALSE(file); - ASSERT_EQ(file.getError(), std::errc::no_such_file_or_directory); + ASSERT_EQ(llvm::errorToErrorCode(file.takeError()), + std::make_error_code(std::errc::no_such_file_or_directory)); - auto readingDirAsFile = manager.getFile("MyDirectory"); + auto readingDirAsFile = manager.getFileRef("MyDirectory"); ASSERT_FALSE(readingDirAsFile); - ASSERT_EQ(readingDirAsFile.getError(), std::errc::is_a_directory); + ASSERT_EQ(llvm::errorToErrorCode(readingDirAsFile.takeError()), + std::make_error_code(std::errc::is_a_directory)); - auto readingFileAsDir = manager.getDirectory("foo.cpp"); + auto readingFileAsDir = manager.getDirectoryRef("foo.cpp"); ASSERT_FALSE(readingFileAsDir); - ASSERT_EQ(readingFileAsDir.getError(), std::errc::not_a_directory); + ASSERT_EQ(llvm::errorToErrorCode(readingFileAsDir.takeError()), + std::make_error_code(std::errc::not_a_directory)); } // The following tests apply to Unix-like system only. @@ -236,11 +241,11 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedRealFiles) { statCache->InjectFile("abc/bar.cpp", 42); manager.setStatCache(std::move(statCache)); - auto f1 = manager.getFile("abc/foo.cpp"); - auto f2 = manager.getFile("abc/bar.cpp"); + auto f1 = manager.getOptionalFileRef("abc/foo.cpp"); + auto f2 = manager.getOptionalFileRef("abc/bar.cpp"); - EXPECT_EQ(f1 ? *f1 : nullptr, - f2 ? *f2 : nullptr); + EXPECT_EQ(f1 ? &f1->getFileEntry() : nullptr, + f2 ? &f2->getFileEntry() : nullptr); // Check that getFileRef also does the right thing. auto r1 = manager.getFileRef("abc/foo.cpp"); @@ -338,11 +343,11 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedVirtualFiles) { statCache->InjectFile("abc/bar.cpp", 42); manager.setStatCache(std::move(statCache)); - auto f1 = manager.getFile("abc/foo.cpp"); - auto f2 = manager.getFile("abc/bar.cpp"); + auto f1 = manager.getOptionalFileRef("abc/foo.cpp"); + auto f2 = manager.getOptionalFileRef("abc/bar.cpp"); - EXPECT_EQ(f1 ? *f1 : nullptr, - f2 ? *f2 : nullptr); + EXPECT_EQ(f1 ? &f1->getFileEntry() : nullptr, + f2 ? &f2->getFileEntry() : nullptr); } TEST_F(FileManagerTest, getFileRefEquality) { @@ -420,20 +425,19 @@ TEST_F(FileManagerTest, getVirtualFileWithDifferentName) { manager.setStatCache(std::move(statCache)); // Inject the virtual file: - const FileEntry *file1 = manager.getVirtualFile("c:\\tmp\\test", 123, 1); - ASSERT_TRUE(file1 != nullptr); - EXPECT_EQ(43U, file1->getUniqueID().getFile()); - EXPECT_EQ(123, file1->getSize()); + FileEntryRef file1 = manager.getVirtualFileRef("c:\\tmp\\test", 123, 1); + EXPECT_EQ(43U, file1.getUniqueID().getFile()); + EXPECT_EQ(123, file1.getSize()); // Lookup the virtual file with a different name: - auto file2 = manager.getFile("c:/tmp/test", 100, 1); + auto file2 = manager.getOptionalFileRef("c:/tmp/test", 100, 1); ASSERT_TRUE(file2); // Check that it's the same UFE: EXPECT_EQ(file1, *file2); - EXPECT_EQ(43U, (*file2)->getUniqueID().getFile()); + EXPECT_EQ(43U, file2->getUniqueID().getFile()); // Check that the contents of the UFE are not overwritten by the entry in the // filesystem: - EXPECT_EQ(123, (*file2)->getSize()); + EXPECT_EQ(123, file2->getSize()); } #endif // !_WIN32 @@ -487,12 +491,11 @@ TEST_F(FileManagerTest, getVirtualFileFillsRealPathName) { Manager.setStatCache(std::move(statCache)); // Check for real path. - const FileEntry *file = Manager.getVirtualFile("/tmp/test", 123, 1); - ASSERT_TRUE(file != nullptr); + FileEntryRef file = Manager.getVirtualFileRef("/tmp/test", 123, 1); SmallString<64> ExpectedResult = CustomWorkingDir; llvm::sys::path::append(ExpectedResult, "tmp", "test"); - EXPECT_EQ(file->tryGetRealPathName(), ExpectedResult); + EXPECT_EQ(file.getFileEntry().tryGetRealPathName(), ExpectedResult); } TEST_F(FileManagerTest, getFileDontOpenRealPath) { @@ -514,12 +517,12 @@ TEST_F(FileManagerTest, getFileDontOpenRealPath) { Manager.setStatCache(std::move(statCache)); // Check for real path. - auto file = Manager.getFile("/tmp/test", /*OpenFile=*/false); + auto file = Manager.getOptionalFileRef("/tmp/test", /*OpenFile=*/false); ASSERT_TRUE(file); SmallString<64> ExpectedResult = CustomWorkingDir; llvm::sys::path::append(ExpectedResult, "tmp", "test"); - EXPECT_EQ((*file)->tryGetRealPathName(), ExpectedResult); + EXPECT_EQ(file->getFileEntry().tryGetRealPathName(), ExpectedResult); } TEST_F(FileManagerTest, getBypassFile) { diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp index 0f2476bd8b061..2b3fce9128ba9 100644 --- a/clang/unittests/Basic/SourceManagerTest.cpp +++ b/clang/unittests/Basic/SourceManagerTest.cpp @@ -549,7 +549,7 @@ TEST_F(SourceManagerTest, getMacroArgExpandedLocation) { // These are different than normal includes since predefines buffer doesn't // have a valid insertion location. PP.setPredefines("#include \"/implicit-header.h\""); - FileMgr.getVirtualFile("/implicit-header.h", 0, 0); + FileMgr.getVirtualFileRef("/implicit-header.h", 0, 0); PP.Initialize(*Target); PP.EnterMainSourceFile(); diff --git a/clang/unittests/Frontend/CompilerInstanceTest.cpp b/clang/unittests/Frontend/CompilerInstanceTest.cpp index 8bc705dd21993..5cf548e913cc1 100644 --- a/clang/unittests/Frontend/CompilerInstanceTest.cpp +++ b/clang/unittests/Frontend/CompilerInstanceTest.cpp @@ -71,7 +71,7 @@ TEST(CompilerInstance, DefaultVFSOverlayFromInvocation) { // Check if the virtual file exists which means that our VFS is used by the // CompilerInstance. - ASSERT_TRUE(Instance.getFileManager().getFile("vfs-virtual.file")); + ASSERT_TRUE(Instance.getFileManager().getOptionalFileRef("vfs-virtual.file")); } TEST(CompilerInstance, AllowDiagnosticLogWithUnownedDiagnosticConsumer) { From 394f59c203c715d00be4643c20bbe22893397adf Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Wed, 25 Sep 2024 18:39:03 +0100 Subject: [PATCH 202/212] [NVPTX] Add Read/Write/SideEffect attributes to atomic instructions (#109665) Set the mayLoad, mayStore, and hasSideEffects hints for NVPTX atomic instructions. This prevents any optimizations (e.g. rematerialization) from illegally duplicating them and generating broken code. --- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 86 +++++++++++++----------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 56c551661151d..f22f0b368c9d5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1619,14 +1619,16 @@ multiclass F_ATOMIC_2_imp Pred> { - def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, - Requires; - def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, - Requires], Pred)>; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, + Requires; + def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, + Requires], Pred)>; + } } multiclass F_ATOMIC_2 Pred> { - def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), - !strconcat( - "{{ \n\t", - ".reg \t.s", TypeStr, " temp; \n\t", - "neg.s", TypeStr, " \ttemp, $b; \n\t", - "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", - "}}"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, - Requires; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat( + "{{ \n\t", + ".reg \t.s", TypeStr, " temp; \n\t", + "neg.s", TypeStr, " \ttemp, $b; \n\t", + "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", + "}}"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, + Requires; + } } multiclass F_ATOMIC_2_NEG Pred = []> { @@ -1665,29 +1669,31 @@ multiclass F_ATOMIC_3_imp Pred> { - def reg : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, regclass:$b, regclass:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, - Requires; - - def imm1 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, IMMType:$b, regclass:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, - Requires; - - def imm2 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, regclass:$b, IMMType:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, - Requires; - - def imm3 : NVPTXInst<(outs regclass:$dst), - (ins ptrclass:$addr, IMMType:$b, IMMType:$c), - !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), - [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, - Requires; + let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { + def reg : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, regclass:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, + Requires; + + def imm1 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, regclass:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, + Requires; + + def imm2 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, IMMType:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, + Requires; + + def imm3 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, IMMType:$c), + !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), + [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, + Requires; + } } multiclass F_ATOMIC_3 Pred = []> { From eba21accf221c16875f1318952a0b1f468913a30 Mon Sep 17 00:00:00 2001 From: vporpo Date: Wed, 25 Sep 2024 10:43:36 -0700 Subject: [PATCH 203/212] [SandboxIR][Utils] Implement getMemoryLocation() (#109724) This patch implements sandboxir::Utils::memoryLocationGetOrNone() that calls MemoryLocation::getOrNone() internally. Ideally this would require a sandboxir::MemoryLocation, but this should be good enough for now. --- llvm/include/llvm/SandboxIR/SandboxIR.h | 1 + llvm/include/llvm/SandboxIR/Utils.h | 9 ++++ llvm/lib/SandboxIR/CMakeLists.txt | 1 + llvm/unittests/SandboxIR/CMakeLists.txt | 2 + llvm/unittests/SandboxIR/UtilsTest.cpp | 56 +++++++++++++++++++++++++ 5 files changed, 69 insertions(+) create mode 100644 llvm/unittests/SandboxIR/UtilsTest.cpp diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index d4c907ce8327d..d99d564ba24e5 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -346,6 +346,7 @@ class Value { friend class NoCFIValue; // For `Val`. friend class ConstantPtrAuth; // For `Val`. friend class ConstantExpr; // For `Val`. + friend class Utils; // For `Val`. // Region needs to manipulate metadata in the underlying LLVM Value, we don't // expose metadata in sandboxir. diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h index ccc0030868a55..4e8a175f54705 100644 --- a/llvm/include/llvm/SandboxIR/Utils.h +++ b/llvm/include/llvm/SandboxIR/Utils.h @@ -12,6 +12,9 @@ #ifndef LLVM_SANDBOXIR_UTILS_H #define LLVM_SANDBOXIR_UTILS_H +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/SandboxIR/SandboxIR.h" + namespace llvm::sandboxir { class Utils { @@ -48,6 +51,12 @@ class Utils { Type *Ty = getExpectedType(V); return DL.getTypeSizeInBits(Ty->LLVMTy); } + + /// Equivalent to MemoryLocation::getOrNone(I). + static std::optional + memoryLocationGetOrNone(const Instruction *I) { + return llvm::MemoryLocation::getOrNone(cast(I->Val)); + } }; } // namespace llvm::sandboxir diff --git a/llvm/lib/SandboxIR/CMakeLists.txt b/llvm/lib/SandboxIR/CMakeLists.txt index 03474be0c7b80..b2e6f6285fea5 100644 --- a/llvm/lib/SandboxIR/CMakeLists.txt +++ b/llvm/lib/SandboxIR/CMakeLists.txt @@ -11,5 +11,6 @@ add_llvm_component_library(LLVMSandboxIR LINK_COMPONENTS Core Support + Analysis ) diff --git a/llvm/unittests/SandboxIR/CMakeLists.txt b/llvm/unittests/SandboxIR/CMakeLists.txt index a228637b062a4..2ab284a511fca 100644 --- a/llvm/unittests/SandboxIR/CMakeLists.txt +++ b/llvm/unittests/SandboxIR/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS AsmParser SandboxIR Core + Analysis ) add_llvm_unittest(SandboxIRTests @@ -9,4 +10,5 @@ add_llvm_unittest(SandboxIRTests SandboxIRTest.cpp TrackerTest.cpp TypesTest.cpp + UtilsTest.cpp ) diff --git a/llvm/unittests/SandboxIR/UtilsTest.cpp b/llvm/unittests/SandboxIR/UtilsTest.cpp new file mode 100644 index 0000000000000..ded3edf1206a4 --- /dev/null +++ b/llvm/unittests/SandboxIR/UtilsTest.cpp @@ -0,0 +1,56 @@ +//===- UtilsTest.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/SandboxIR/Utils.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/SandboxIR/SandboxIR.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +struct UtilsTest : public testing::Test { + LLVMContext C; + std::unique_ptr M; + + void parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + M = parseAssemblyString(IR, Err, C); + if (!M) + Err.print("UtilsTest", errs()); + } + BasicBlock *getBasicBlockByName(Function &F, StringRef Name) { + for (BasicBlock &BB : F) + if (BB.getName() == Name) + return &BB; + llvm_unreachable("Expected to find basic block!"); + } +}; + +TEST_F(UtilsTest, getMemoryLocation) { + parseIR(C, R"IR( +define void @foo(ptr %arg0) { + %ld = load i8, ptr %arg0 + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + auto *LLVMBB = &*LLVMF->begin(); + auto *LLVMLd = cast(&*LLVMBB->begin()); + sandboxir::Context Ctx(C); + sandboxir::Function *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto *Ld = cast(&*BB->begin()); + EXPECT_EQ(sandboxir::Utils::memoryLocationGetOrNone(Ld), + MemoryLocation::getOrNone(LLVMLd)); +} From f172c31a578fa72375ce7a2199ecdfbbd764dc0e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 26 Sep 2024 01:47:46 +0800 Subject: [PATCH 204/212] [RISCV] Lower memory ops and VP splat for zvfhmin and zvfbfmin (#109387) We can lower f16/bf16 memory ops without promotion through the existing custom lowering. Some of the zero strided VP loads get combined to a VP splat, so we need to also handle the lowering for that for f16/bf16 w/ zvfhmin/zvfbfmin. This patch copies the lowering from ISD::SPLAT_VECTOR over to lowerScalarSplat which is used by the VP splat lowering. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 33 ++- llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll | 72 ++++- .../test/CodeGen/RISCV/rvv/masked-store-fp.ll | 72 ++++- llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll | 216 +++++++++++++- .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll | 194 ++++++++++++- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 162 ++++++++--- .../test/CodeGen/RISCV/rvv/strided-vpstore.ll | 88 +++++- llvm/test/CodeGen/RISCV/rvv/vp-splat.ll | 264 +++++++++++++++--- .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 229 +++++++++++++-- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 82 +++++- .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll | 219 +++++++++++++-- llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 72 ++++- 12 files changed, 1548 insertions(+), 155 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 56c9ba67bb35e..3b61cb5dfe090 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1082,10 +1082,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT, + Custom); else - setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom); - setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, + EltVT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, + ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, + ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, + ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, + ISD::VP_SCATTER}, + VT, Custom); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); @@ -4449,11 +4456,27 @@ static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, bool HasPassthru = Passthru && !Passthru.isUndef(); if (!HasPassthru && !Passthru) Passthru = DAG.getUNDEF(VT); - if (VT.isFloatingPoint()) - return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); + MVT EltVT = VT.getVectorElementType(); MVT XLenVT = Subtarget.getXLenVT(); + if (VT.isFloatingPoint()) { + if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || + EltVT == MVT::bf16) { + if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) || + (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) + Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar); + else + Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar); + MVT IVT = VT.changeVectorElementType(MVT::i16); + Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru); + SDValue Splat = + lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget); + return DAG.getNode(ISD::BITCAST, DL, VT, Splat); + } + return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL); + } + // Simplest case is that the operand needs to be promoted to XLenVT. if (Scalar.getValueType().bitsLE(XLenVT)) { // If the operand is a constant, sign extend to increase our chances diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll index df1bd889c1042..9c7ad239bcade 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll @@ -1,6 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define @masked_load_nxv1bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv1bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv1bf16(ptr, i32, , ) define @masked_load_nxv1f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv1f16: @@ -35,6 +48,17 @@ define @masked_load_nxv1f64(ptr %a, %mas } declare @llvm.masked.load.nxv1f64(ptr, i32, , ) +define @masked_load_nxv2bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv2bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv2bf16(ptr, i32, , ) + define @masked_load_nxv2f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv2f16: ; CHECK: # %bb.0: @@ -68,6 +92,17 @@ define @masked_load_nxv2f64(ptr %a, %mas } declare @llvm.masked.load.nxv2f64(ptr, i32, , ) +define @masked_load_nxv4bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv4bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv4bf16(ptr, i32, , ) + define @masked_load_nxv4f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv4f16: ; CHECK: # %bb.0: @@ -101,6 +136,17 @@ define @masked_load_nxv4f64(ptr %a, %mas } declare @llvm.masked.load.nxv4f64(ptr, i32, , ) +define @masked_load_nxv8bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv8bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv8bf16(ptr, i32, , ) + define @masked_load_nxv8f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv8f16: ; CHECK: # %bb.0: @@ -134,6 +180,17 @@ define @masked_load_nxv8f64(ptr %a, %mas } declare @llvm.masked.load.nxv8f64(ptr, i32, , ) +define @masked_load_nxv16bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv16bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv16bf16(ptr, i32, , ) + define @masked_load_nxv16f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv16f16: ; CHECK: # %bb.0: @@ -156,6 +213,17 @@ define @masked_load_nxv16f32(ptr %a, %m } declare @llvm.masked.load.nxv16f32(ptr, i32, , ) +define @masked_load_nxv32bf16(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv32bf16(ptr %a, i32 2, %mask, undef) + ret %load +} +declare @llvm.masked.load.nxv32bf16(ptr, i32, , ) + define @masked_load_nxv32f16(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv32f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll index 17193aef1dff9..ddb56e0d979a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll @@ -1,6 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define void @masked_store_nxv1bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv1bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv1bf16.p0(, ptr, i32, ) define void @masked_store_nxv1f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv1f16: @@ -35,6 +48,17 @@ define void @masked_store_nxv1f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv2bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv2bf16.p0(, ptr, i32, ) + define void @masked_store_nxv2f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv2f16: ; CHECK: # %bb.0: @@ -68,6 +92,17 @@ define void @masked_store_nxv2f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv4bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv4bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv4bf16.p0(, ptr, i32, ) + define void @masked_store_nxv4f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv4f16: ; CHECK: # %bb.0: @@ -101,6 +136,17 @@ define void @masked_store_nxv4f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv8bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv8bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv8bf16.p0(, ptr, i32, ) + define void @masked_store_nxv8f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv8f16: ; CHECK: # %bb.0: @@ -134,6 +180,17 @@ define void @masked_store_nxv8f64( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv16bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv16bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv16bf16.p0(, ptr, i32, ) + define void @masked_store_nxv16f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv16f16: ; CHECK: # %bb.0: @@ -156,6 +213,17 @@ define void @masked_store_nxv16f32( %val, ptr %a, , ptr, i32, ) +define void @masked_store_nxv32bf16( %val, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv32bf16.p0( %val, ptr %a, i32 2, %mask) + ret void +} +declare void @llvm.masked.store.nxv32bf16.p0(, ptr, i32, ) + define void @masked_store_nxv32f16( %val, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv32f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index be37be06f0e77..189ba08dddc7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 declare @llvm.masked.gather.nxv1i8.nxv1p0(, i32, , ) @@ -1257,6 +1265,206 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ret void } +declare @llvm.masked.gather.nxv1bf16.nxv1p0(, i32, , ) + +define @mgather_nxv1bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv1bf16.nxv1p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv2bf16.nxv2p0(, i32, , ) + +define @mgather_nxv2bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv2bf16.nxv2p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv4bf16.nxv4p0(, i32, , ) + +define @mgather_nxv4bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_truemask_nxv4bf16( %ptrs, %passthru) { +; RV32-LABEL: mgather_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, splat (i1 1), %passthru) + ret %v +} + +define @mgather_falsemask_nxv4bf16( %ptrs, %passthru) { +; RV32-LABEL: mgather_falsemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_falsemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, zeroinitializer, %passthru) + ret %v +} + +declare @llvm.masked.gather.nxv8bf16.nxv8p0(, i32, , ) + +define @mgather_nxv8bf16( %ptrs, %m, %passthru) { +; RV32-LABEL: mgather_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vluxei16.v v10, (a0), v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} + +define @mgather_baseidx_nxv8bf16(ptr %base, %idxs, %m, %passthru) { +; RV32-LABEL: mgather_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; RV32-NEXT: vwadd.vv v12, v8, v8 +; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mgather_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.masked.gather.nxv8bf16.nxv8p0( %ptrs, i32 2, %m, %passthru) + ret %v +} declare @llvm.masked.gather.nxv1f16.nxv1p0(, i32, , ) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 9bfa0f31dc3a6..29db67b4b0a41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1,8 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,RV64 declare void @llvm.masked.scatter.nxv1i8.nxv1p0(, , i32, ) @@ -967,6 +975,184 @@ define void @mscatter_baseidx_nxv8i64( %val, ptr %base, , , i32, ) + +define void @mscatter_nxv1bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv1bf16.nxv1p0( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv2bf16.nxv2p0(, , i32, ) + +define void @mscatter_nxv2bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv2bf16.nxv2p0( %val, %ptrs, i32 2, %m) + ret void +} + +declare void @llvm.masked.scatter.nxv4bf16.nxv4p0(, , i32, ) + +define void @mscatter_nxv4bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_truemask_nxv4bf16( %val, %ptrs) { +; RV32-LABEL: mscatter_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, splat (i1 1)) + ret void +} + +define void @mscatter_falsemask_nxv4bf16( %val, %ptrs) { +; CHECK-LABEL: mscatter_falsemask_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call void @llvm.masked.scatter.nxv4bf16.nxv4p0( %val, %ptrs, i32 2, zeroinitializer) + ret void +} + +declare void @llvm.masked.scatter.nxv8bf16.nxv8p0(, , i32, ) + +define void @mscatter_nxv8bf16( %val, %ptrs, %m) { +; RV32-LABEL: mscatter_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_sext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_zext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m) { +; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vwaddu.vv v12, v10, v10 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; CHECK-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + +define void @mscatter_baseidx_nxv8bf16( %val, ptr %base, %idxs, %m) { +; RV32-LABEL: mscatter_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: mscatter_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.masked.scatter.nxv8bf16.nxv8p0( %val, %ptrs, i32 2, %m) + ret void +} + declare void @llvm.masked.scatter.nxv1f16.nxv1p0(, , i32, ) define void @mscatter_nxv1f16( %val, %ptrs, %m) { diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 14976f21b7dbb..87ff1859a4d2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,16 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-ZVFH,CHECK-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-ZVFH,CHECK-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH,CHECK-NO-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64 +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH,CHECK-NO-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-ZVFHMIN,CHECK-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-ZVFHMIN,CHECK-OPT-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN,CHECK-NO-OPT-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN,CHECK-NO-OPT-RV64 declare @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, , i32) @@ -352,6 +364,74 @@ define @strided_vpload_nxv8i64(ptr %ptr, i32 signext %stride, ret %load } +declare @llvm.experimental.vp.strided.load.nxv1bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv1bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv1bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv2bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +define @strided_vpload_nxv2bf16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1 +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv2bf16.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv4bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + +define @strided_vpload_nxv4bf16_unit_stride(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv4bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv4bf16.p0.i32(ptr %ptr, i32 2, %m, i32 %evl) + ret %load +} + +declare @llvm.experimental.vp.strided.load.nxv8bf16.p0.i32(ptr, i32, , i32) + +define @strided_vpload_nxv8bf16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpload_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + %load = call @llvm.experimental.vp.strided.load.nxv8bf16.p0.i32(ptr %ptr, i32 signext %stride, %m, i32 %evl) + ret %load +} + declare @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr, i32, , i32) define @strided_vpload_nxv1f16(ptr %ptr, i32 signext %stride, %m, i32 zeroext %evl) { @@ -589,10 +669,10 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV32-NEXT: sltu a5, a3, a2 ; CHECK-RV32-NEXT: addi a5, a5, -1 ; CHECK-RV32-NEXT: and a2, a5, a2 -; CHECK-RV32-NEXT: bltu a3, a4, .LBB50_2 +; CHECK-RV32-NEXT: bltu a3, a4, .LBB56_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a3, a4 -; CHECK-RV32-NEXT: .LBB50_2: +; CHECK-RV32-NEXT: .LBB56_2: ; CHECK-RV32-NEXT: mul a4, a3, a1 ; CHECK-RV32-NEXT: add a4, a0, a4 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma @@ -661,10 +741,10 @@ define @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV64-NEXT: sltu a5, a2, a3 ; CHECK-RV64-NEXT: addi a5, a5, -1 ; CHECK-RV64-NEXT: and a3, a5, a3 -; CHECK-RV64-NEXT: bltu a2, a4, .LBB50_2 +; CHECK-RV64-NEXT: bltu a2, a4, .LBB56_2 ; CHECK-RV64-NEXT: # %bb.1: ; CHECK-RV64-NEXT: mv a2, a4 -; CHECK-RV64-NEXT: .LBB50_2: +; CHECK-RV64-NEXT: .LBB56_2: ; CHECK-RV64-NEXT: mul a4, a2, a1 ; CHECK-RV64-NEXT: add a4, a0, a4 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -689,19 +769,19 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { ; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; -; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16: -; CHECK-NO-OPT: # %bb.0: -; CHECK-NO-OPT-NEXT: flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5 -; CHECK-NO-OPT-NEXT: ret +; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_nxv1f16: +; CHECK-NO-OPT-ZVFH: # %bb.0: +; CHECK-NO-OPT-ZVFH-NEXT: flh fa5, 0(a0) +; CHECK-NO-OPT-ZVFH-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; CHECK-NO-OPT-ZVFH-NEXT: vfmv.v.f v8, fa5 +; CHECK-NO-OPT-ZVFH-NEXT: ret +; +; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_nxv1f16: +; CHECK-NO-OPT-ZVFHMIN: # %bb.0: +; CHECK-NO-OPT-ZVFHMIN-NEXT: lh a0, 0(a0) +; CHECK-NO-OPT-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; CHECK-NO-OPT-ZVFHMIN-NEXT: vmv.v.x v8, a0 +; CHECK-NO-OPT-ZVFHMIN-NEXT: ret %load = call @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 4) ret %load } @@ -854,10 +941,10 @@ define @zero_strided_vadd_nxv16i64( %v, p ; CHECK-RV32-NEXT: and a3, a4, a3 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero -; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2 +; CHECK-RV32-NEXT: bltu a2, a1, .LBB61_2 ; CHECK-RV32-NEXT: # %bb.1: ; CHECK-RV32-NEXT: mv a2, a1 -; CHECK-RV32-NEXT: .LBB55_2: +; CHECK-RV32-NEXT: .LBB61_2: ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero ; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma @@ -908,3 +995,6 @@ define @zero_strided_vadd_nxv1p0( %v, ptr % %load = call @llvm.experimental.vp.strided.load.nxv1p0.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) ret %load } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-OPT-ZVFH: {{.*}} +; CHECK-OPT-ZVFHMIN: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll index e8704b35f31f7..abdf9ab09bb9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ ; RUN: -check-prefixes=CHECK,CHECK-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ +; RUN: -verify-machineinstrs < %s | FileCheck %s \ +; RUN: -check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfhmin,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s \ ; RUN: -check-prefixes=CHECK,CHECK-RV64 @@ -280,6 +286,64 @@ define void @strided_vpstore_nxv8i64( %val, ptr %ptr, i32 sign ret void } +declare void @llvm.experimental.vp.strided.store.nxv1bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv1bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv1bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv2bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv2bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv2bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv4bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + +define void @strided_vpstore_nxv4bf16_unit_stride( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv4bf16_unit_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv4bf16.p0.i32( %val, ptr %ptr, i32 2, %m, i32 %evl) + ret void +} + +declare void @llvm.experimental.vp.strided.store.nxv8bf16.p0.i32(, ptr, i32, , i32) + +define void @strided_vpstore_nxv8bf16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { +; CHECK-LABEL: strided_vpstore_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma +; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t +; CHECK-NEXT: ret + call void @llvm.experimental.vp.strided.store.nxv8bf16.p0.i32( %val, ptr %ptr, i32 %strided, %m, i32 %evl) + ret void +} + declare void @llvm.experimental.vp.strided.store.nxv1f16.p0.i32(, ptr, i32, , i32) define void @strided_vpstore_nxv1f16( %val, ptr %ptr, i32 signext %strided, %m, i32 zeroext %evl) { @@ -493,10 +557,10 @@ define void @strided_store_nxv16f64( %v, ptr %ptr, i32 sig ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB41_2 +; CHECK-NEXT: bltu a2, a3, .LBB46_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB41_2: +; CHECK-NEXT: .LBB46_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t ; CHECK-NEXT: sub a5, a2, a3 @@ -520,10 +584,10 @@ define void @strided_store_nxv16f64_allones_mask( %v, ptr ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a3, .LBB42_2 +; CHECK-NEXT: bltu a2, a3, .LBB47_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB42_2: +; CHECK-NEXT: .LBB47_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a0), a1 ; CHECK-NEXT: sub a3, a2, a3 @@ -549,15 +613,15 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: slli a6, a4, 1 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: bltu a3, a6, .LBB43_2 +; CHECK-NEXT: bltu a3, a6, .LBB48_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a5, a6 -; CHECK-NEXT: .LBB43_2: +; CHECK-NEXT: .LBB48_2: ; CHECK-NEXT: mv a7, a5 -; CHECK-NEXT: bltu a5, a4, .LBB43_4 +; CHECK-NEXT: bltu a5, a4, .LBB48_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a4 -; CHECK-NEXT: .LBB43_4: +; CHECK-NEXT: .LBB48_4: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr t0, vlenb @@ -585,10 +649,10 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a7), a2, v0.t -; CHECK-NEXT: bltu a0, a4, .LBB43_6 +; CHECK-NEXT: bltu a0, a4, .LBB48_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a4 -; CHECK-NEXT: .LBB43_6: +; CHECK-NEXT: .LBB48_6: ; CHECK-NEXT: mul a3, a5, a2 ; CHECK-NEXT: srli a4, a4, 2 ; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll index 5fbdefda9f402..0da05c1bd4364 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZFMIN +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZFMIN define @vp_splat_nxv1i8(i8 %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_splat_nxv1i8: @@ -270,62 +274,254 @@ define @vp_splat_nxv8i64(i64 %val, %m, i32 ret %splat } +define @vp_splat_nxv1bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv1bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv1bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv2bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv2bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv4bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv4bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv8bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv8bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv16bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv16bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv32bf16(bfloat %val, %m, i32 zeroext %evl) { +; NOZFMIN-LABEL: vp_splat_nxv32bf16: +; NOZFMIN: # %bb.0: +; NOZFMIN-NEXT: fmv.x.w a1, fa0 +; NOZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; NOZFMIN-NEXT: vmv.v.x v8, a1 +; NOZFMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv32bf16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv32bf16(bfloat %val, %m, i32 %evl) + ret %splat +} + define @vp_splat_nxv1f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv1f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv1f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv1f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv1f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv2f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv2f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv2f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv2f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv2f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv4f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv4f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv4f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv8f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv8f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv8f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv8f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv16f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv16f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv16f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv16f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv16f16(half %val, %m, i32 %evl) ret %splat } define @vp_splat_nxv32f16(half %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vp_splat_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret +; ZVFH-LABEL: vp_splat_nxv32f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vfmv.v.f v8, fa0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vp_splat_nxv32f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZFMIN-LABEL: vp_splat_nxv32f16: +; ZFMIN: # %bb.0: +; ZFMIN-NEXT: fmv.x.h a1, fa0 +; ZFMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZFMIN-NEXT: vmv.v.x v8, a1 +; ZFMIN-NEXT: ret %splat = call @llvm.experimental.vp.splat.nxv32f16(half %val, %m, i32 %evl) ret %splat } @@ -452,10 +648,10 @@ define @vp_splat_nxv32i32(i32 %val, %m, i ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: bltu a1, a2, .LBB39_2 +; CHECK-NEXT: bltu a1, a2, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB39_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index c0d7ecf74956b..84c8321b5b934 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare @llvm.vp.gather.nxv1i8.nxv1p0(, , i32) @@ -1237,6 +1241,195 @@ define @vpgather_baseidx_nxv8i64(ptr %base, %v } +declare @llvm.vp.gather.nxv1bf16.nxv1p0(, , i32) + +define @vpgather_nxv1bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv1bf16.nxv1p0( %ptrs, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv2bf16.nxv2p0(, , i32) + +define @vpgather_nxv2bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t +; RV64-NEXT: vmv1r.v v8, v10 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv2bf16.nxv2p0( %ptrs, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv4bf16.nxv4p0(, , i32) + +define @vpgather_nxv4bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv4bf16.nxv4p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_truemask_nxv4bf16( %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpgather_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vluxei32.v v10, (zero), v8 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vluxei64.v v12, (zero), v8 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv4bf16.nxv4p0( %ptrs, splat (i1 1), i32 %evl) + ret %v +} + +declare @llvm.vp.gather.nxv8bf16.nxv8p0(, , i32) + +define @vpgather_nxv8bf16( %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v8 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v10, v8, v8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v10, v8, v8 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + +define @vpgather_baseidx_nxv8bf16(ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpgather_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v8, v8 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpgather_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + %v = call @llvm.vp.gather.nxv8bf16.nxv8p0( %ptrs, %m, i32 %evl) + ret %v +} + declare @llvm.vp.gather.nxv1f16.nxv1p0(, , i32) define @vpgather_nxv1f16( %ptrs, %m, i32 zeroext %evl) { @@ -2275,10 +2468,10 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB104_2 +; RV32-NEXT: bltu a1, a2, .LBB113_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB104_2: +; RV32-NEXT: .LBB113_2: ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2413,10 +2606,10 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v24, v24, 3 -; RV64-NEXT: bltu a1, a2, .LBB104_2 +; RV64-NEXT: bltu a1, a2, .LBB113_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB104_2: +; RV64-NEXT: .LBB113_2: ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t @@ -2444,10 +2637,10 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV32-NEXT: bltu a1, a2, .LBB105_2 +; RV32-NEXT: bltu a1, a2, .LBB114_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB105_2: +; RV32-NEXT: .LBB114_2: ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t @@ -2469,10 +2662,10 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t -; RV64-NEXT: bltu a1, a2, .LBB105_2 +; RV64-NEXT: bltu a1, a2, .LBB114_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB105_2: +; RV64-NEXT: .LBB114_2: ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index d4f117fad37ee..0a98b672fb19c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.load.nxv1i8.p0(ptr, , i32) @@ -269,6 +273,64 @@ define @vpload_nxv8i64(ptr %ptr, %m, i32 ze ret %load } +declare @llvm.vp.load.nxv1bf16.p0(ptr, , i32) + +define @vpload_nxv1bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2bf16.p0(ptr, , i32) + +define @vpload_nxv2bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4bf16.p0(ptr, , i32) + +define @vpload_nxv4bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8bf16.p0(ptr, , i32) + +define @vpload_nxv8bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8bf16.p0(ptr %ptr, %m, i32 %evl) + ret %load +} + declare @llvm.vp.load.nxv1f16.p0(ptr, , i32) define @vpload_nxv1f16(ptr %ptr, %m, i32 zeroext %evl) { @@ -461,10 +523,10 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: bltu a1, a2, .LBB38_2 +; CHECK-NEXT: bltu a1, a2, .LBB43_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: .LBB38_2: +; CHECK-NEXT: .LBB43_2: ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t @@ -491,10 +553,10 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, , , , i32) @@ -1106,6 +1110,185 @@ define void @vpscatter_baseidx_nxv8i64( %val, ptr %base, , , , i32) + +define void @vpscatter_nxv1bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv1bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv1bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv1bf16.nxv1p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv2bf16.nxv2p0(, , , i32) + +define void @vpscatter_nxv2bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv2bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv2bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv2bf16.nxv2p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv4bf16.nxv4p0(, , , i32) + +define void @vpscatter_nxv4bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv4bf16.nxv4p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_truemask_nxv4bf16( %val, %ptrs, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_truemask_nxv4bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v10 +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_truemask_nxv4bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v12 +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv4bf16.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) + ret void +} + +declare void @llvm.vp.scatter.nxv8bf16.nxv8p0(, , , i32) + +define void @vpscatter_nxv8bf16( %val, %ptrs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t +; RV64-NEXT: ret + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_sext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vsext.vf4 v12, v10 +; RV32-NEXT: vadd.vv v12, v12, v12 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %eidxs = sext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_zext_nxv8i8_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV32-NEXT: vwaddu.vv v12, v10, v10 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; RV64-NEXT: vwaddu.vv v12, v10, v10 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t +; RV64-NEXT: ret + %eidxs = zext %idxs to + %ptrs = getelementptr inbounds bfloat, ptr %base, %eidxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + +define void @vpscatter_baseidx_nxv8bf16( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { +; RV32-LABEL: vpscatter_baseidx_nxv8bf16: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; RV32-NEXT: vwadd.vv v12, v10, v10 +; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vpscatter_baseidx_nxv8bf16: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vadd.vv v16, v16, v16 +; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: ret + %ptrs = getelementptr inbounds bfloat, ptr %base, %idxs + call void @llvm.vp.scatter.nxv8bf16.nxv8p0( %val, %ptrs, %m, i32 %evl) + ret void +} + declare void @llvm.vp.scatter.nxv1f16.nxv1p0(, , , i32) define void @vpscatter_nxv1f16( %val, %ptrs, %m, i32 zeroext %evl) { @@ -2115,10 +2298,10 @@ define void @vpscatter_nxv16f64( %val, ; RV32-NEXT: vl8re32.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a0, .LBB99_2 +; RV32-NEXT: bltu a1, a0, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a2, a0 -; RV32-NEXT: .LBB99_2: +; RV32-NEXT: .LBB108_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t ; RV32-NEXT: sub a2, a1, a0 @@ -2148,10 +2331,10 @@ define void @vpscatter_nxv16f64( %val, ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: mv a0, a2 -; RV64-NEXT: bltu a2, a1, .LBB99_2 +; RV64-NEXT: bltu a2, a1, .LBB108_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB99_2: +; RV64-NEXT: .LBB108_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t ; RV64-NEXT: sub a0, a2, a1 @@ -2183,10 +2366,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB100_2 +; RV32-NEXT: bltu a2, a1, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB100_2: +; RV32-NEXT: .LBB109_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2223,10 +2406,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB100_2 +; RV64-NEXT: bltu a2, a1, .LBB109_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB100_2: +; RV64-NEXT: .LBB109_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 @@ -2264,10 +2447,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB101_2 +; RV32-NEXT: bltu a2, a1, .LBB110_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB101_2: +; RV32-NEXT: .LBB110_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2304,10 +2487,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v0, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB101_2 +; RV64-NEXT: bltu a2, a1, .LBB110_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB101_2: +; RV64-NEXT: .LBB110_2: ; RV64-NEXT: addi a4, sp, 16 ; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma @@ -2346,10 +2529,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: vsll.vi v24, v24, 3 ; RV32-NEXT: mv a3, a2 -; RV32-NEXT: bltu a2, a1, .LBB102_2 +; RV32-NEXT: bltu a2, a1, .LBB111_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a3, a1 -; RV32-NEXT: .LBB102_2: +; RV32-NEXT: .LBB111_2: ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: sub a3, a2, a1 @@ -2371,10 +2554,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: mv a3, a2 -; RV64-NEXT: bltu a2, a1, .LBB102_2 +; RV64-NEXT: bltu a2, a1, .LBB111_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 -; RV64-NEXT: .LBB102_2: +; RV64-NEXT: .LBB111_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 015d7645aaa29..d935e52149d20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.vp.store.nxv1i8.p0(, ptr, , i32) @@ -208,6 +212,54 @@ define void @vpstore_nxv8i64( %val, ptr %ptr, , ptr, , i32) + +define void @vpstore_nxv1bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2bf16.p0(, ptr, , i32) + +define void @vpstore_nxv2bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4bf16.p0(, ptr, , i32) + +define void @vpstore_nxv4bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8bf16.p0(, ptr, , i32) + +define void @vpstore_nxv8bf16( %val, ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8bf16.p0( %val, ptr %ptr, %m, i32 %evl) + ret void +} + declare void @llvm.vp.store.nxv1f16.p0(, ptr, , i32) define void @vpstore_nxv1f16( %val, ptr %ptr, %m, i32 zeroext %evl) { @@ -369,10 +421,10 @@ define void @vpstore_nxv16f64( %val, ptr %ptr, %val, ptr %ptr, %val, ptr %ptr, Date: Wed, 25 Sep 2024 10:54:40 -0700 Subject: [PATCH 205/212] [clang] Fix FileManagerTest Compilation failure caused by b1aea98cfa357e23f4bb52232da5f41781f23bff. --- clang/unittests/Basic/FileManagerTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp index 53897322f6160..88d778fccd68e 100644 --- a/clang/unittests/Basic/FileManagerTest.cpp +++ b/clang/unittests/Basic/FileManagerTest.cpp @@ -255,8 +255,8 @@ TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedRealFiles) { EXPECT_EQ("abc/foo.cpp", r1->getName()); EXPECT_EQ("abc/bar.cpp", r2->getName()); - EXPECT_EQ((f1 ? *f1 : nullptr), &r1->getFileEntry()); - EXPECT_EQ((f2 ? *f2 : nullptr), &r2->getFileEntry()); + EXPECT_EQ((f1 ? &f1->getFileEntry() : nullptr), &r1->getFileEntry()); + EXPECT_EQ((f2 ? &f2->getFileEntry() : nullptr), &r2->getFileEntry()); } TEST_F(FileManagerTest, getFileRefReturnsCorrectNameForDifferentStatPath) { From 0f521931b85e6b5f798af357cf32a7ae782a848d Mon Sep 17 00:00:00 2001 From: gonzalobg <65027571+gonzalobg@users.noreply.github.com> Date: Wed, 25 Sep 2024 20:13:56 +0200 Subject: [PATCH 206/212] LLVMContext: add getSyncScopeName() to lookup individual scope name (#109484) This PR adds a `getSyncScopeString(Id)` API to `LLVMContext` that returns the `StringRef` for that ID, if any. --- llvm/include/llvm/IR/LLVMContext.h | 4 ++++ llvm/lib/IR/LLVMContext.cpp | 4 ++++ llvm/lib/IR/LLVMContextImpl.cpp | 10 ++++++++++ llvm/lib/IR/LLVMContextImpl.h | 4 ++++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 ++----- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index 558816e146587..6d4a59ba6b1f6 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -130,6 +130,10 @@ class LLVMContext { /// scope names are ordered by increasing synchronization scope IDs. void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// getSyncScopeName - Returns the name of a SyncScope::ID + /// registered with LLVMContext, if any. + std::optional getSyncScopeName(SyncScope::ID Id) const; + /// Define the GC for a function void setGC(const Function &Fn, std::string GCName); diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index 22e60772def43..e078527b597b4 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -330,6 +330,10 @@ void LLVMContext::getSyncScopeNames(SmallVectorImpl &SSNs) const { pImpl->getSyncScopeNames(SSNs); } +std::optional LLVMContext::getSyncScopeName(SyncScope::ID Id) const { + return pImpl->getSyncScopeName(Id); +} + void LLVMContext::setGC(const Function &Fn, std::string GCName) { pImpl->GCNames[&Fn] = std::move(GCName); } diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 4f1ef8cec3213..f2c965a45df3a 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -244,6 +244,16 @@ void LLVMContextImpl::getSyncScopeNames( SSNs[SSE.second] = SSE.first(); } +std::optional +LLVMContextImpl::getSyncScopeName(SyncScope::ID Id) const { + for (const auto &SSE : SSC) { + if (SSE.second != Id) + continue; + return SSE.first(); + } + return std::nullopt; +} + /// Gets the OptPassGate for this LLVMContextImpl, which defaults to the /// singleton OptBisect if not explicitly set. OptPassGate &LLVMContextImpl::getOptPassGate() const { diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index e76f004b590ef..971091f304061 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1665,6 +1665,10 @@ class LLVMContextImpl { /// scope names are ordered by increasing synchronization scope IDs. void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// getSyncScopeName - Returns the name of a SyncScope::ID + /// registered with LLVMContext, if any. + std::optional getSyncScopeName(SyncScope::ID Id) const; + /// Maintain the GC name for each function. /// /// This saves allocating an additional word in Function for programs which diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2464361d4eece..885ecab891b1f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16144,11 +16144,8 @@ static bool atomicIgnoresDenormalModeOrFPModeIsFTZ(const AtomicRMWInst *RMW) { static OptimizationRemark emitAtomicRMWLegalRemark(const AtomicRMWInst *RMW) { LLVMContext &Ctx = RMW->getContext(); - SmallVector SSNs; - Ctx.getSyncScopeNames(SSNs); - StringRef MemScope = SSNs[RMW->getSyncScopeID()].empty() - ? "system" - : SSNs[RMW->getSyncScopeID()]; + StringRef SS = Ctx.getSyncScopeName(RMW->getSyncScopeID()).value_or(""); + StringRef MemScope = SS.empty() ? StringRef("system") : SS; return OptimizationRemark(DEBUG_TYPE, "Passed", RMW) << "Hardware instruction generated for atomic " From c3334dad732e3a3a53e57c028bdb337766e01598 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 11:59:11 -0700 Subject: [PATCH 207/212] [rtsan] Add exit statistics (#109885) adds the flag `print_stats_on_exit` which mirrors nsan's same flag. # Why? Not only is this nice for the end users, this gives us a very trivial way to test deduplication which is next up Currently the style is something like: ``` RealtimeSanitizer exit stats: Total error count: 488 ``` --- compiler-rt/lib/rtsan/CMakeLists.txt | 8 ++++-- compiler-rt/lib/rtsan/rtsan.cpp | 9 ++++++- compiler-rt/lib/rtsan/rtsan_flags.inc | 1 + compiler-rt/lib/rtsan/rtsan_stats.cpp | 35 +++++++++++++++++++++++++++ compiler-rt/lib/rtsan/rtsan_stats.h | 21 ++++++++++++++++ compiler-rt/test/rtsan/exit_stats.cpp | 23 ++++++++++++++++++ 6 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 compiler-rt/lib/rtsan/rtsan_stats.cpp create mode 100644 compiler-rt/lib/rtsan/rtsan_stats.h create mode 100644 compiler-rt/test/rtsan/exit_stats.cpp diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt index b7e2362d31352..af34fb63cf53c 100644 --- a/compiler-rt/lib/rtsan/CMakeLists.txt +++ b/compiler-rt/lib/rtsan/CMakeLists.txt @@ -5,7 +5,9 @@ set(RTSAN_CXX_SOURCES rtsan_context.cpp rtsan_diagnostics.cpp rtsan_flags.cpp - rtsan_interceptors.cpp) + rtsan_interceptors.cpp + rtsan_stats.cpp + ) set(RTSAN_PREINIT_SOURCES rtsan_preinit.cpp) @@ -16,7 +18,9 @@ set(RTSAN_HEADERS rtsan_context.h rtsan_diagnostics.h rtsan_flags.h - rtsan_flags.inc) + rtsan_flags.inc + rtsan_stats.h + ) set(RTSAN_DEPS) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index f02e89421035c..87c3611935ee5 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -13,6 +13,7 @@ #include "rtsan/rtsan_diagnostics.h" #include "rtsan/rtsan_flags.h" #include "rtsan/rtsan_interceptors.h" +#include "rtsan/rtsan_stats.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" @@ -46,7 +47,10 @@ static InitializationState GetInitializationState() { static auto OnViolationAction(DiagnosticsInfo info) { return [info]() { - __rtsan::PrintDiagnostics(info); + IncrementTotalErrorCount(); + + PrintDiagnostics(info); + if (flags().halt_on_error) Die(); }; @@ -62,6 +66,9 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_init() { InitializeFlags(); InitializeInterceptors(); + if (flags().print_stats_on_exit) + Atexit(PrintStatisticsSummary); + SetInitializationState(InitializationState::Initialized); } diff --git a/compiler-rt/lib/rtsan/rtsan_flags.inc b/compiler-rt/lib/rtsan/rtsan_flags.inc index 25d62cf0a60fb..1df71127d19d3 100644 --- a/compiler-rt/lib/rtsan/rtsan_flags.inc +++ b/compiler-rt/lib/rtsan/rtsan_flags.inc @@ -17,3 +17,4 @@ // See COMMON_FLAG in sanitizer_flags.inc for more details. RTSAN_FLAG(bool, halt_on_error, true, "Exit after first reported error.") +RTSAN_FLAG(bool, print_stats_on_exit, false, "Print stats on exit.") diff --git a/compiler-rt/lib/rtsan/rtsan_stats.cpp b/compiler-rt/lib/rtsan/rtsan_stats.cpp new file mode 100644 index 0000000000000..7c1ccf2876f08 --- /dev/null +++ b/compiler-rt/lib/rtsan/rtsan_stats.cpp @@ -0,0 +1,35 @@ +//===--- rtsan_stats.cpp - Realtime Sanitizer -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Part of the RealtimeSanitizer runtime library +// +//===----------------------------------------------------------------------===// + +#include "rtsan/rtsan_stats.h" + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" + +using namespace __sanitizer; +using namespace __rtsan; + +static atomic_uint32_t rtsan_total_error_count{0}; + +void __rtsan::IncrementTotalErrorCount() { + atomic_fetch_add(&rtsan_total_error_count, 1, memory_order_relaxed); +} + +static u32 GetTotalErrorCount() { + return atomic_load(&rtsan_total_error_count, memory_order_relaxed); +} + +void __rtsan::PrintStatisticsSummary() { + ScopedErrorReportLock l; + Printf("RealtimeSanitizer exit stats:\n"); + Printf(" Total error count: %u\n", GetTotalErrorCount()); +} diff --git a/compiler-rt/lib/rtsan/rtsan_stats.h b/compiler-rt/lib/rtsan/rtsan_stats.h new file mode 100644 index 0000000000000..3aa30f6a5db76 --- /dev/null +++ b/compiler-rt/lib/rtsan/rtsan_stats.h @@ -0,0 +1,21 @@ +//===--- rtsan_stats.h - Realtime Sanitizer ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Part of the RealtimeSanitizer runtime library +// +//===----------------------------------------------------------------------===// + +#pragma once + +namespace __rtsan { + +void IncrementTotalErrorCount(); + +void PrintStatisticsSummary(); + +} // namespace __rtsan diff --git a/compiler-rt/test/rtsan/exit_stats.cpp b/compiler-rt/test/rtsan/exit_stats.cpp new file mode 100644 index 0000000000000..b46a0fd62bac1 --- /dev/null +++ b/compiler-rt/test/rtsan/exit_stats.cpp @@ -0,0 +1,23 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: env RTSAN_OPTIONS="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s + +// UNSUPPORTED: ios + +// Intent: Ensure exits stats are printed on exit. + +#include + +void violation() [[clang::nonblocking]] { + const int kNumViolations = 10; + for (int i = 0; i < kNumViolations; i++) { + usleep(1); + } +} + +int main() { + violation(); + return 0; +} + +// CHECK: RealtimeSanitizer exit stats: +// CHECK-NEXT: Total error count: 10 From c59ecbee202fbf2092aec8b96d894bfe95e09af2 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Sun, 22 Sep 2024 18:35:00 -0600 Subject: [PATCH 208/212] [rtsan] Only print out unique stack traces --- compiler-rt/lib/rtsan/rtsan.cpp | 17 ++++++++- compiler-rt/lib/rtsan/rtsan_diagnostics.cpp | 8 ---- compiler-rt/lib/rtsan/rtsan_stats.cpp | 10 +++++ compiler-rt/lib/rtsan/rtsan_stats.h | 1 + compiler-rt/test/rtsan/deduplicate_errors.cpp | 37 +++++++++++++++++++ compiler-rt/test/rtsan/exit_stats.cpp | 1 + 6 files changed, 65 insertions(+), 9 deletions(-) create mode 100644 compiler-rt/test/rtsan/deduplicate_errors.cpp diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 87c3611935ee5..2a6cf325b02d5 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -18,6 +18,7 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_mutex.h" +#include "sanitizer_common/sanitizer_stackdepot.h" #include "sanitizer_common/sanitizer_stacktrace.h" using namespace __rtsan; @@ -49,7 +50,21 @@ static auto OnViolationAction(DiagnosticsInfo info) { return [info]() { IncrementTotalErrorCount(); - PrintDiagnostics(info); + BufferedStackTrace stack; + stack.Unwind(info.pc, info.bp, nullptr, + /*request_fast*/ true); + + StackDepotHandle handle = StackDepotPut_WithHandle(stack); + + const bool is_stack_novel = handle.use_count() == 0; + if (UNLIKELY(is_stack_novel)) { + IncrementUniqueErrorCount(); + + PrintDiagnostics(info); + stack.Print(); + + handle.inc_use_count_unsafe(); + } if (flags().halt_on_error) Die(); diff --git a/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp b/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp index f82001f5b2057..cfe71481d3dc7 100644 --- a/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp +++ b/compiler-rt/lib/rtsan/rtsan_diagnostics.cpp @@ -39,13 +39,6 @@ class Decorator : public __sanitizer::SanitizerCommonDecorator { }; } // namespace -static void PrintStackTrace(uptr pc, uptr bp) { - BufferedStackTrace stack{}; - - stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal); - stack.Print(); -} - static void PrintError(const Decorator &decorator, const DiagnosticsInfo &info) { const auto ErrorTypeStr = [&info]() -> const char * { @@ -91,5 +84,4 @@ void __rtsan::PrintDiagnostics(const DiagnosticsInfo &info) { PrintError(d, info); PrintReason(d, info); Printf("%s", d.Default()); - PrintStackTrace(info.pc, info.bp); } diff --git a/compiler-rt/lib/rtsan/rtsan_stats.cpp b/compiler-rt/lib/rtsan/rtsan_stats.cpp index 7c1ccf2876f08..dac7b23c3ef52 100644 --- a/compiler-rt/lib/rtsan/rtsan_stats.cpp +++ b/compiler-rt/lib/rtsan/rtsan_stats.cpp @@ -19,17 +19,27 @@ using namespace __sanitizer; using namespace __rtsan; static atomic_uint32_t rtsan_total_error_count{0}; +static atomic_uint32_t rtsan_unique_error_count{0}; void __rtsan::IncrementTotalErrorCount() { atomic_fetch_add(&rtsan_total_error_count, 1, memory_order_relaxed); } +void __rtsan::IncrementUniqueErrorCount() { + atomic_fetch_add(&rtsan_unique_error_count, 1, memory_order_relaxed); +} + static u32 GetTotalErrorCount() { return atomic_load(&rtsan_total_error_count, memory_order_relaxed); } +static u32 GetUniqueErrorCount() { + return atomic_load(&rtsan_unique_error_count, memory_order_relaxed); +} + void __rtsan::PrintStatisticsSummary() { ScopedErrorReportLock l; Printf("RealtimeSanitizer exit stats:\n"); Printf(" Total error count: %u\n", GetTotalErrorCount()); + Printf(" Unique error count: %u\n", GetUniqueErrorCount()); } diff --git a/compiler-rt/lib/rtsan/rtsan_stats.h b/compiler-rt/lib/rtsan/rtsan_stats.h index 3aa30f6a5db76..a72098792c89c 100644 --- a/compiler-rt/lib/rtsan/rtsan_stats.h +++ b/compiler-rt/lib/rtsan/rtsan_stats.h @@ -15,6 +15,7 @@ namespace __rtsan { void IncrementTotalErrorCount(); +void IncrementUniqueErrorCount(); void PrintStatisticsSummary(); diff --git a/compiler-rt/test/rtsan/deduplicate_errors.cpp b/compiler-rt/test/rtsan/deduplicate_errors.cpp new file mode 100644 index 0000000000000..224a2215ac7cb --- /dev/null +++ b/compiler-rt/test/rtsan/deduplicate_errors.cpp @@ -0,0 +1,37 @@ +// RUN: %clangxx -fsanitize=realtime %s -o %t +// RUN: env RTSAN_OPTIONS="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s +// RUN: env RTSAN_OPTIONS="halt_on_error=false" %run %t 2>&1 | grep "==ERROR: RealtimeSanitizer:" | wc -l | awk '{exit $1 != 4}' + +// UNSUPPORTED: ios + +// Intent: Ensure all errors are deduplicated. + +#include + +const int kNumViolations = 10; + +void violation() [[clang::nonblocking]] { + for (int i = 0; i < kNumViolations; i++) + usleep(1); +} + +void violation2() [[clang::nonblocking]] { + for (int i = 0; i < kNumViolations; i++) + violation(); +} + +void double_violation() [[clang::nonblocking]] { + violation(); + violation2(); +} + +int main() { + violation(); // 1 unique errors here, but 10 total + violation2(); // 1 unique errors here, but 100 total + double_violation(); // 2 unique errors here, but 110 total + return 0; +} + +// CHECK: RealtimeSanitizer exit stats: +// CHECK-NEXT: Total error count: 220 +// CHECK-NEXT: Unique error count: 4 diff --git a/compiler-rt/test/rtsan/exit_stats.cpp b/compiler-rt/test/rtsan/exit_stats.cpp index b46a0fd62bac1..4341fbb0f9cf2 100644 --- a/compiler-rt/test/rtsan/exit_stats.cpp +++ b/compiler-rt/test/rtsan/exit_stats.cpp @@ -21,3 +21,4 @@ int main() { // CHECK: RealtimeSanitizer exit stats: // CHECK-NEXT: Total error count: 10 +// CHECK-NEXT: Unique error count: 1 From 0328607de1750f8581697b84a836c98bd534071b Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 13:10:57 -0700 Subject: [PATCH 209/212] [PR] fmayer - Add comment about UNLIKELY --- compiler-rt/lib/rtsan/rtsan.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 2a6cf325b02d5..5f44025cb7a47 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -57,6 +57,10 @@ static auto OnViolationAction(DiagnosticsInfo info) { StackDepotHandle handle = StackDepotPut_WithHandle(stack); const bool is_stack_novel = handle.use_count() == 0; + + // Marked UNLIKELY as if user is runing with halt_on_error=false + // we expect a high number of duplicate stacks. We are willing + // To pay for the first insertion. if (UNLIKELY(is_stack_novel)) { IncrementUniqueErrorCount(); From 9a41bf771b2a6d40c8dc3371aed6151f5b2ece0f Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 16:10:44 -0700 Subject: [PATCH 210/212] [PR] fmayer - fast unwind on fatal --- compiler-rt/lib/rtsan/rtsan.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 5f44025cb7a47..6fcff5e326a52 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -51,9 +51,14 @@ static auto OnViolationAction(DiagnosticsInfo info) { IncrementTotalErrorCount(); BufferedStackTrace stack; + + // We use the unwind_on_fatal flag here because of precedent with other + // sanitizers, this action is not necessarily fatal if halt_on_error=false stack.Unwind(info.pc, info.bp, nullptr, - /*request_fast*/ true); + common_flags()->fast_unwind_on_fatal); + // If in the future we interop with other sanitizers, we will + // need to make our own stackdepot StackDepotHandle handle = StackDepotPut_WithHandle(stack); const bool is_stack_novel = handle.use_count() == 0; From 31d6232d7ccaf36148c449cfafdbc2b74ce185c5 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 16:38:49 -0700 Subject: [PATCH 211/212] [PR] fmayer - CHECK for counting instead of wc --- compiler-rt/test/rtsan/deduplicate_errors.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/rtsan/deduplicate_errors.cpp b/compiler-rt/test/rtsan/deduplicate_errors.cpp index 224a2215ac7cb..43f80334981c2 100644 --- a/compiler-rt/test/rtsan/deduplicate_errors.cpp +++ b/compiler-rt/test/rtsan/deduplicate_errors.cpp @@ -1,6 +1,5 @@ // RUN: %clangxx -fsanitize=realtime %s -o %t // RUN: env RTSAN_OPTIONS="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s -// RUN: env RTSAN_OPTIONS="halt_on_error=false" %run %t 2>&1 | grep "==ERROR: RealtimeSanitizer:" | wc -l | awk '{exit $1 != 4}' // UNSUPPORTED: ios @@ -32,6 +31,13 @@ int main() { return 0; } +// CHECK: ==ERROR: +// CHECK: ==ERROR: +// CHECK: ==ERROR: +// CHECK: ==ERROR: + +// CHECK-NOT: ==ERROR: + // CHECK: RealtimeSanitizer exit stats: // CHECK-NEXT: Total error count: 220 // CHECK-NEXT: Unique error count: 4 From 831e03604580800ae743069a1b0ca79d7c735597 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 25 Sep 2024 17:23:16 -0700 Subject: [PATCH 212/212] [PR] ilovepi - CHECK-COUNT --- compiler-rt/test/rtsan/deduplicate_errors.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/compiler-rt/test/rtsan/deduplicate_errors.cpp b/compiler-rt/test/rtsan/deduplicate_errors.cpp index 43f80334981c2..7d60d4d7da7dd 100644 --- a/compiler-rt/test/rtsan/deduplicate_errors.cpp +++ b/compiler-rt/test/rtsan/deduplicate_errors.cpp @@ -31,11 +31,7 @@ int main() { return 0; } -// CHECK: ==ERROR: -// CHECK: ==ERROR: -// CHECK: ==ERROR: -// CHECK: ==ERROR: - +// CHECK-COUNT-4: ==ERROR: // CHECK-NOT: ==ERROR: // CHECK: RealtimeSanitizer exit stats: